Notes#

Loading data

Data and software versions
--------------------------

Erwin Schulhoff – Suite dansante en jazz version v2.3
Datapackage 'schulhoff_suite_dansante_en_jazz' @ v2.3
dimcat version 3.4.0

Dataset
=======
{'inputs': {'basepath': None,
            'packages': {'schulhoff_suite_dansante_en_jazz': ["'schulhoff_suite_dansante_en_jazz.measures' "
                                                              '(MuseScoreFacetName.MuseScoreMeasures)',
                                                              "'schulhoff_suite_dansante_en_jazz.notes' "
                                                              '(MuseScoreFacetName.MuseScoreNotes)',
                                                              "'schulhoff_suite_dansante_en_jazz.expanded' "
                                                              '(MuseScoreFacetName.MuseScoreHarmonies)',
                                                              "'schulhoff_suite_dansante_en_jazz.chords' "
                                                              '(MuseScoreFacetName.MuseScoreChords)',
                                                              "'schulhoff_suite_dansante_en_jazz.metadata' "
                                                              '(FeatureName.Metadata)']}},
 'outputs': {'basepath': None, 'packages': {}},
 'pipeline': []}

Metadata#

filtered_D = filters.HasHarmonyLabelsFilter(keep_values=[True]).process(D)

all_metadata = filtered_D.get_metadata()
all_metadata.reset_index(level=1).groupby(level=0).nth(0).iloc[:, :20]

	piece	TimeSig	KeySig	last_mc	last_mn	length_qb	last_mc_unfolded	last_mn_unfolded	length_qb_unfolded	volta_mcs	all_notes_qb	n_onsets	n_onset_positions	guitar_chord_count	form_label_count	label_count	annotated_key	harmony_version	annotators	reviewers
corpus
schulhoff_suite_dansante_en_jazz	suite_dansante_en_jazz_1_stomp	{1: '2/2'}	{1: 0}	46	46	184.0	46	46	184.0	()	505.83	706	317	0	0	97	E	2.3.0	Amelia Brey	DK

chronological_order = utils.chronological_corpus_order(all_metadata)
corpus_colors = dict(zip(chronological_order, utils.CORPUS_COLOR_SCALE))

notes_feature = filtered_D.get_feature("notes")
all_notes = notes_feature.df
print(f"{len(all_notes.index)} notes over {len(all_notes.groupby(level=[0,1]))} files.")
all_notes.head()

5669 notes over 6 files.

			mc	mn	quarterbeats	quarterbeats_all_endings	duration_qb	duration	mc_onset	mn_onset	timesig	staff	voice	volta	chord_id	gracenote	midi	name	nominal_duration	octave	scalar	tied	tpc_name	tpc
corpus	piece	i
schulhoff_suite_dansante_en_jazz	suite_dansante_en_jazz_1_stomp	0	1	1	0	0	1.0	1/4	0	0	2/2	2	1	<NA>	5	<NA>	40	E2	1/4	2	1	<NA>	E	4
		1	1	1	1	1	1.0	1/4	1/4	1/4	2/2	2	1	<NA>	6	<NA>	52	E3	1/4	3	1	<NA>	E	4
		2	1	1	1	1	1.0	1/4	1/4	1/4	2/2	2	1	<NA>	6	<NA>	59	B3	1/4	3	1	<NA>	B	5
		3	1	1	1	1	0.5	1/8	1/4	1/4	2/2	1	1	<NA>	0	<NA>	68	G#4	1/8	4	1	<NA>	G#	8
		4	1	1	1	1	0.5	1/8	1/4	1/4	2/2	1	1	<NA>	0	<NA>	73	C#5	1/8	5	1	<NA>	C#	7

def weight_notes(nl, group_col="midi", precise=True):
    summed_durations = nl.groupby(group_col).duration_qb.sum()
    shortest_duration = summed_durations[summed_durations > 0].min()
    summed_durations /= shortest_duration  # normalize such that the shortest duration results in 1 occurrence
    if not precise:
        # This simple trick reduces compute time but also precision:
        # The rationale is to have the smallest value be slightly larger than 0.5 because
        # if it was exactly 0.5 it would be rounded down by repeat_notes_according_to_weights()
        summed_durations /= 1.9999999
    return repeat_notes_according_to_weights(summed_durations)


def repeat_notes_according_to_weights(weights):
    try:
        counts = weights.round().astype(int)
    except Exception:
        return pd.Series(dtype=int)
    counts_reflecting_weights = []
    for pitch, count in counts.items():
        counts_reflecting_weights.extend([pitch] * count)
    return pd.Series(counts_reflecting_weights)

Ambitus#

corpus_names = {
    corp: utils.get_corpus_display_name(corp) for corp in chronological_order
}
chronological_corpus_names = list(corpus_names.values())
corpus_name_colors = {
    corpus_names[corp]: color for corp, color in corpus_colors.items()
}
all_notes["corpus_name"] = all_notes.index.get_level_values(0).map(corpus_names)

grouped_notes = all_notes.groupby("corpus_name")
weighted_midi = pd.concat(
    [weight_notes(nl, "midi", precise=False) for _, nl in grouped_notes],
    keys=grouped_notes.groups.keys(),
).reset_index(level=0)
weighted_midi.columns = ["dataset", "midi"]
weighted_midi

	dataset	midi
0	Schulhoff Suite Dansante En Jazz	26
1	Schulhoff Suite Dansante En Jazz	27
2	Schulhoff Suite Dansante En Jazz	28
3	Schulhoff Suite Dansante En Jazz	30
4	Schulhoff Suite Dansante En Jazz	30
...	...	...
2035	Schulhoff Suite Dansante En Jazz	91
2036	Schulhoff Suite Dansante En Jazz	93
2037	Schulhoff Suite Dansante En Jazz	93
2038	Schulhoff Suite Dansante En Jazz	93
2039	Schulhoff Suite Dansante En Jazz	94

2040 rows × 2 columns

# fig = px.violin(weighted_midi,
#                 x='dataset',
#                 y='midi',
#                 color='dataset',
#                 title="Corpus-wise distribution over registers (ambitus)",
#                 box=True,
#                 labels=dict(
#                     dataset='',
#                     midi='distribution of pitches by duration'
#                 ),
#                 category_orders=dict(dataset=chronological_corpus_names),
#                 color_discrete_map=corpus_name_colors,
#                 width=1000, height=600,
#                )
# fig.update_traces(spanmode='hard') # do not extend beyond outliers
# fig.update_layout(**utils.STD_LAYOUT,
#                  showlegend=False)
# fig.update_yaxes(
#     tickmode= 'array',
#     tickvals= [12, 24, 36, 48, 60, 72, 84, 96],
#     ticktext = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7"],
# )
# fig.update_xaxes(tickangle=45)
# save_figure_as(fig, "ambitus_corpuswise_violins")
# fig.show()

Tonal Pitch Classes (TPC)#

weighted_tpc = pd.concat(
    [weight_notes(nl, "tpc") for _, nl in grouped_notes],
    keys=grouped_notes.groups.keys(),
).reset_index(level=0)
weighted_tpc.columns = ["dataset", "tpc"]
weighted_tpc

	dataset	tpc
0	Schulhoff Suite Dansante En Jazz	-8
1	Schulhoff Suite Dansante En Jazz	-8
2	Schulhoff Suite Dansante En Jazz	-8
3	Schulhoff Suite Dansante En Jazz	-8
4	Schulhoff Suite Dansante En Jazz	-8
...	...	...
8130	Schulhoff Suite Dansante En Jazz	14
8131	Schulhoff Suite Dansante En Jazz	14
8132	Schulhoff Suite Dansante En Jazz	14
8133	Schulhoff Suite Dansante En Jazz	14
8134	Schulhoff Suite Dansante En Jazz	15

8135 rows × 2 columns

As violin plot#

# fig = px.violin(weighted_tpc,
#                 x='dataset',
#                 y='tpc',
#                 color='dataset',
#                 title="Corpus-wise distribution over line of fifths (tonal pitch classes)",
#                 box=True,
#                 labels=dict(
#                     dataset='',
#                     tpc='distribution of tonal pitch classes by duration'
#                 ),
#                 category_orders=dict(dataset=chronological_corpus_names),
#                 color_discrete_map=corpus_name_colors,
#                 width=1000,
#                 height=600,
#                )
# fig.update_traces(spanmode='hard') # do not extend beyond outliers
# fig.update_layout(**utils.STD_LAYOUT,
#                  showlegend=False)
# fig.update_yaxes(
#     tickmode= 'array',
#     tickvals= [-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18],
#     ticktext = ["Dbb", "Bbb", "Gb", "Eb", "C", "A", "F#", "D#", "B#", "G##", "E##"],
#     zerolinecolor='grey',
#     zeroline=True
# )
# fig.update_xaxes(tickangle=45)
# save_figure_as(fig, "pitch_class_distributions_corpuswise_violins")
# fig.show()

(all_notes)

			mc	mn	quarterbeats	quarterbeats_all_endings	duration_qb	duration	mc_onset	mn_onset	timesig	staff	voice	volta	chord_id	gracenote	midi	name	nominal_duration	octave	scalar	tied	tpc_name	tpc	corpus_name
corpus	piece	i
schulhoff_suite_dansante_en_jazz	suite_dansante_en_jazz_1_stomp	0	1	1	0	0	1.0	1/4	0	0	2/2	2	1	<NA>	5	<NA>	40	E2	1/4	2	1	<NA>	E	4	Schulhoff Suite Dansante En Jazz
		1	1	1	1	1	1.0	1/4	1/4	1/4	2/2	2	1	<NA>	6	<NA>	52	E3	1/4	3	1	<NA>	E	4	Schulhoff Suite Dansante En Jazz
		2	1	1	1	1	1.0	1/4	1/4	1/4	2/2	2	1	<NA>	6	<NA>	59	B3	1/4	3	1	<NA>	B	5	Schulhoff Suite Dansante En Jazz
		3	1	1	1	1	0.5	1/8	1/4	1/4	2/2	1	1	<NA>	0	<NA>	68	G#4	1/8	4	1	<NA>	G#	8	Schulhoff Suite Dansante En Jazz
		4	1	1	1	1	0.5	1/8	1/4	1/4	2/2	1	1	<NA>	0	<NA>	73	C#5	1/8	5	1	<NA>	C#	7	Schulhoff Suite Dansante En Jazz
	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
	suite_dansante_en_jazz_6_fox-trot	924	50	50	197	197	1.0	1/4	1/4	1/4	2/2	2	1	<NA>	510	<NA>	39	Eb2	1/4	2	1	<NA>	Eb	-3	Schulhoff Suite Dansante En Jazz
		925	50	50	198	198	0.5	1/8	1/2	1/2	2/2	1	2	<NA>	508	<NA>	61	Db4	1/8	4	1	<NA>	Db	-5	Schulhoff Suite Dansante En Jazz
		926	50	50	198	198	0.5	1/8	1/2	1/2	2/2	1	2	<NA>	508	<NA>	67	G4	1/8	4	1	<NA>	G	1	Schulhoff Suite Dansante En Jazz
		927	50	50	198	198	0.5	1/8	1/2	1/2	2/2	1	1	<NA>	507	<NA>	71	Cb5	1/8	5	1	<NA>	Cb	-7	Schulhoff Suite Dansante En Jazz
		928	50	50	198	198	0.5	1/8	1/2	1/2	2/2	1	1	<NA>	507	<NA>	76	Fb5	1/8	5	1	<NA>	Fb	-8	Schulhoff Suite Dansante En Jazz

5669 rows × 23 columns

width = 1400
height = 800

weighted_pitch_values = pd.concat(
    [
        weighted_midi.rename(columns={"midi": "value"}),
        weighted_tpc.rename(columns={"tpc": "value"}),
    ],
    keys=["MIDI pitch", "Tonal pitch class"],
    names=["distribution"],
).reset_index(level=[0, 1])

fig = plotting.make_violin_plot(
    weighted_pitch_values,
    x_col="dataset",
    y_col="value",
    color="dataset",
    facet_row="distribution",
    box=True,
    labels=dict(dataset="", tpc="distribution of tonal pitch classes by duration"),
    category_orders=dict(dataset=chronological_corpus_names),
    # color_discrete_map=corpus_name_colors,
    color_discrete_sequence=px.colors.qualitative.Dark24,
    traces_settings=dict(
        spanmode="hard",
        width=1,
        # scalemode='width'
    ),
    layout=dict(
        showlegend=False,
        margin=dict(
            t=0,
            b=0,
            l=0,
            r=0,
        ),
    ),
    x_axis=dict(
        # tickangle=45,
        tickfont_size=15
    ),
    y_axis=dict(
        tickmode="array",
        tickvals=[-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 24, 36, 48, 60, 72, 84, 96],
        ticktext=[
            "Dbb",
            "Bbb",
            "Gb",
            "Eb",
            "C",
            "A",
            "F#",
            "D#",
            "B#",
            "G##",
            "C1",
            "C2",
            "C3",
            "C4",
            "C5",
            "C6",
            "C7",
        ],
        zerolinecolor="grey",
        zeroline=True,
    ),
    width=width,
    height=height,
)
utils.realign_subplot_axes(fig, y_axes=dict(title_text=""))
save_figure_as(fig, "notes_violin", width=width, height=height)
fig

fig = plotting.make_box_plot(
    weighted_pitch_values,
    x_col="dataset",
    y_col="value",
    color="dataset",
    facet_row="distribution",
    # box=True,
    labels=dict(dataset="", tpc="distribution of tonal pitch classes by duration"),
    category_orders=dict(dataset=chronological_corpus_names),
    # color_discrete_map=corpus_name_colors,
    color_discrete_sequence=px.colors.qualitative.Light24,
    # traces_settings=dict(spanmode='hard'),
    layout=dict(showlegend=False, margin=dict(t=0)),
    x_axis=dict(tickangle=45, tickfont_size=15),
    y_axis=dict(
        tickmode="array",
        tickvals=[-12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 24, 36, 48, 60, 72, 84, 96],
        ticktext=[
            "Dbb",
            "Bbb",
            "Gb",
            "Eb",
            "C",
            "A",
            "F#",
            "D#",
            "B#",
            "G##",
            "C1",
            "C2",
            "C3",
            "C4",
            "C5",
            "C6",
            "C7",
        ],
        zerolinecolor="grey",
        zeroline=True,
    ),
    width=width,
    height=height,
)
utils.realign_subplot_axes(fig, y_axes=True)
save_figure_as(fig, "notes_box", width=width, height=height)
fig

As bar plots#

bar_data = all_notes.groupby("tpc").duration_qb.sum().reset_index()
x_values = list(range(bar_data.tpc.min(), bar_data.tpc.max() + 1))
x_names = ms3.fifths2name(x_values)
fig = px.bar(
    bar_data,
    x="tpc",
    y="duration_qb",
    labels=dict(tpc="Named pitch class", duration_qb="Duration in quarter notes"),
    color_discrete_sequence=utils.CORPUS_COLOR_SCALE,
    width=1000,
    height=300,
)
fig.update_layout(**utils.STD_LAYOUT)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=x_values,
    ticktext=x_names,
    dtick=1,
    ticks="outside",
    tickcolor="black",
    minor=dict(dtick=6, gridcolor="grey", showgrid=True),
)
save_figure_as(fig, "pitch_class_distribution_absolute_bars")
fig.show()

scatter_data = all_notes.groupby(["corpus_name", "tpc"]).duration_qb.sum().reset_index()
fig = px.bar(
    scatter_data,
    x="tpc",
    y="duration_qb",
    color="corpus_name",
    labels=dict(
        duration_qb="duration",
        tpc="named pitch class",
    ),
    category_orders=dict(dataset=chronological_corpus_names),
    color_discrete_map=corpus_name_colors,
    width=1000,
    height=500,
)
fig.update_layout(**utils.STD_LAYOUT)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=x_values,
    ticktext=x_names,
    dtick=1,
    ticks="outside",
    tickcolor="black",
    minor=dict(dtick=6, gridcolor="grey", showgrid=True),
)
save_figure_as(fig, "pitch_class_distribution_corpuswise_absolute_bars")
fig.show()

As scatter plots#

fig = px.scatter(
    scatter_data,
    x="tpc",
    y="duration_qb",
    color="corpus_name",
    labels=dict(
        duration_qb="duration",
        tpc="named pitch class",
    ),
    category_orders=dict(dataset=chronological_corpus_names),
    color_discrete_map=corpus_name_colors,
    facet_col="corpus_name",
    facet_col_wrap=3,
    facet_col_spacing=0.03,
    width=1000,
    height=1000,
)
fig.update_traces(mode="lines+markers")
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(**utils.STD_LAYOUT, showlegend=False)
fig.update_xaxes(
    zerolinecolor="grey",
    tickmode="array",
    tickvals=[-12, -6, 0, 6, 12, 18],
    ticktext=["Dbb", "Gb", "C", "F#", "B#", "E##"],
    visible=True,
)
fig.update_yaxes(zeroline=False, matches=None, showticklabels=True)
save_figure_as(fig, "pitch_class_distribution_corpuswise_scatter")
fig.show()

no_accidental = bar_data[bar_data.tpc.between(-1, 5)].duration_qb.sum()
with_accidental = bar_data[~bar_data.tpc.between(-1, 5)].duration_qb.sum()

entire = no_accidental + with_accidental
(
    f"Fraction of note duration without accidental of the entire durations: {no_accidental} / {entire} = "
    f"{no_accidental / entire}"
)

'Fraction of note duration without accidental of the entire durations: 2232.666666666667 / 4068.2500000000005 = 0.5488027202523608'

Notes and staves#

print("Distribution of notes over staves:")
utils.value_count_df(all_notes.staff)

Distribution of notes over staves:

	counts	%
staff
1	3220	56.8
2	2449	43.2