From c6d599e39b60dc9b991bd7757123e6cbe377e75b Mon Sep 17 00:00:00 2001 From: xuanchen-liu-97 Date: Tue, 18 Nov 2025 12:32:11 +0000 Subject: [PATCH 01/14] Cleaning chords data 1. Find the segments of chords based on the label. 2. Inference the tone for each segment. 3. Transpose the chords into the Roman numeral expression. --- TDL-chords-data-cleaning.ipynb | 902 +++++++++++++++++++++++++++++++++ 1 file changed, 902 insertions(+) create mode 100644 TDL-chords-data-cleaning.ipynb diff --git a/TDL-chords-data-cleaning.ipynb b/TDL-chords-data-cleaning.ipynb new file mode 100644 index 00000000..43b7fc87 --- /dev/null +++ b/TDL-chords-data-cleaning.ipynb @@ -0,0 +1,902 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "id": "cc404196-bd66-43b2-8f3b-9602f6ccf58f", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import re\n", + "from collections import defaultdict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aeaa2ed4-6d9e-4cd7-90f3-402cd3e92440", + "metadata": {}, + "outputs": [], + "source": [ + "## Uun this code when using it for the first time for loading the dataset\n", + "# pip install huggingface_hub" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "94d6aced-7399-4917-bd60-a3997d5831af", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\liu.xuanc\\AppData\\Local\\Temp\\ipykernel_17284\\2761146993.py:1: DtypeWarning: Columns (2,3,5,6,7,8,9) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " df = pd.read_csv(\"hf://datasets/ailsntua/Chordonomicon/chordonomicon_v2.csv\")\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"hf://datasets/ailsntua/Chordonomicon/chordonomicon_v2.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c37fa55f-b1b2-42f1-b827-dfe8dc5fb04a", + "metadata": {}, + "outputs": [], + "source": [ + "# def clean_chords(chord_string):\n", + "# \"\"\"\n", + "# Clean chord strings, remove structural labels (such as,, etc.),,\n", + " \n", + "# Parameters:\n", + "# Chord_string: A string containing labels and chords\n", + " \n", + "# return:\n", + "# Chords progression: List\n", + "# \"\"\"\n", + "# if pd.isna(chord_string) or chord_string == '':\n", + "# return []\n", + " \n", + "# # delete <...> lable\n", + "# cleaned = re.sub(r'<[^>]+>', '', chord_string)\n", + " \n", + "# # delete other string\n", + "# chords = [chord.strip() for chord in cleaned.split() if chord.strip()]\n", + " \n", + "# return chords" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "13de4c2d-7941-4f47-bb9a-4c5e2774eeb2", + "metadata": {}, + "outputs": [], + "source": [ + "NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']\n", + "\n", + "# A simple homophone conversion dictionary (for standardized input)\n", + "FLAT_TO_SHARP = {\n", + " 'Db': 'C#', 'Eb': 'D#', 'Gb': 'F#', 'Ab': 'G#', 'Bb': 'A#',\n", + " 'db': 'C#', 'eb': 'D#', 'gb': 'F#', 'ab': 'G#', 'bb': 'A#'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a674a0eb-9cc9-46fd-aa74-6d9ce1575e84", + "metadata": {}, + "outputs": [], + "source": [ + "# ==========================================\n", + "# Global configuration and auxiliary functions\n", + "# ==========================================\n", + "\n", + "def parse_chord_root_quality(chord_str):\n", + " \"\"\"\n", + " Auxiliary function: parses a single chord string and returns (Root, Quality)\n", + " For example: \"Am\" ->(\"A\", \"min\"), \"G7\" ->(\"G\", \"7\")\n", + " \"\"\"\n", + " chord_str = chord_str.strip()\n", + " if not chord_str: return None, None\n", + " \n", + " for flat, sharp in FLAT_TO_SHARP.items():\n", + " if chord_str.startswith(flat):\n", + " chord_str = sharp + chord_str[len(flat):]\n", + " break\n", + " if len(chord_str) > 1 and chord_str[1] == 's': # Fs7\n", + " chord_str = chord_str[0] + '#' + chord_str[2:]\n", + " \n", + " match = re.match(r'([A-G]#?)(.*)', chord_str)\n", + " if not match: return None, None\n", + " \n", + " root = match.group(1)\n", + " rest = match.group(2).lower()\n", + " \n", + " # Characteristic\n", + " if 'dim' in rest: quality = 'dim'\n", + " elif 'min' in rest or 'm' == rest: quality = 'min'\n", + " elif '7' in rest and 'maj' not in rest and 'min' not in rest: quality = '7'\n", + " else: quality = 'maj' # Default to major triad/major seventh chord\n", + " \n", + " return root, quality" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4cc94eec-4f9d-43dd-ac98-5a852ad494c7", + "metadata": {}, + "outputs": [], + "source": [ + "# ==========================================\n", + "# Segmentation\n", + "# ==========================================\n", + "\n", + "def segment_music_string(raw_string):\n", + " \"\"\"\n", + " Input: ' C D F G'\n", + " Output: [{'label': 'intro_1', 'chords': ['C', 'D']}, {'label': 'verse_1', 'chords': ['F', 'G']}]\n", + " \"\"\"\n", + " parts = re.split(r'(<[^>]+>)', raw_string)\n", + " \n", + " segments = []\n", + " current_label = \"Unknown\"\n", + " \n", + " for part in parts:\n", + " part = part.strip()\n", + " if not part: continue\n", + " \n", + " # Label (< xxx,> )\n", + " if part.startswith('<') and part.endswith('>'):\n", + " current_label = part.strip('<>')\n", + " else:\n", + " # Content(Chords)\n", + " chord_list = part.split()\n", + " if chord_list:\n", + " segments.append({\n", + " 'label': current_label,\n", + " 'chords': chord_list\n", + " })\n", + " \n", + " return segments" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f94b2dca-8469-4ec3-bcf3-b4a7fd0886ce", + "metadata": {}, + "outputs": [], + "source": [ + "# ==========================================\n", + "# Key Inference\n", + "# ==========================================\n", + "\n", + "KEY_TEMPLATES = {}\n", + "def _build_templates():\n", + " for i in range(12):\n", + " root = NOTES[i]\n", + " # Major key template\n", + " s_maj = [NOTES[(i + n) % 12] for n in [0, 2, 4, 5, 7, 9, 11]]\n", + " maj_chords = {s_maj[0]:['maj'], s_maj[1]:['min'], s_maj[2]:['min'], s_maj[3]:['maj'], s_maj[4]:['maj','7'], s_maj[5]:['min'], s_maj[6]:['dim']}\n", + " KEY_TEMPLATES[f\"{root} Major\"] = (maj_chords, s_maj[0], s_maj[4]) # (Template, main tone, subordinate tone)\n", + " \n", + " # Minor Tune Template (Natural+Harmony)\n", + " s_min = [NOTES[(i + n) % 12] for n in [0, 2, 3, 5, 7, 8, 10]]\n", + " dom_root = NOTES[(i + 7) % 12]\n", + " min_chords = {s_min[0]:['min'], s_min[1]:['dim'], s_min[2]:['maj'], s_min[3]:['min'], s_min[4]:['min'], dom_root:['maj','7'], s_min[5]:['maj'], s_min[6]:['maj','7']}\n", + " KEY_TEMPLATES[f\"{root} Minor\"] = (min_chords, s_min[0], dom_root)\n", + "_build_templates()\n", + "\n", + "def infer_key_from_list(chord_list):\n", + " \"\"\"\n", + " input: ['C', 'F', 'G7', 'C']\n", + " output: 'C Major'\n", + " \"\"\"\n", + " if not chord_list: return \"Unknown\"\n", + " \n", + " parsed_data = []\n", + " for idx, c_str in enumerate(chord_list):\n", + " r, q = parse_chord_root_quality(c_str)\n", + " if r:\n", + " next_r = None\n", + " if idx + 1 < len(chord_list):\n", + " next_r, _ = parse_chord_root_quality(chord_list[idx+1])\n", + " parsed_data.append((r, q, next_r))\n", + " \n", + " scores = defaultdict(int)\n", + " \n", + " # calculate the score for each\n", + " for key_name, (template, tonic, dom) in KEY_TEMPLATES.items():\n", + " score = 0\n", + " for root, quality, next_root in parsed_data:\n", + " # Basic mathc\n", + " if root in template:\n", + " score += 1\n", + " if quality in template[root]:\n", + " score += 2\n", + " else:\n", + " score -= 1\n", + " \n", + " if root == dom and quality == '7' and next_root == tonic:\n", + " score += 5\n", + " \n", + " if root == tonic:\n", + " score += 1\n", + " \n", + " scores[key_name] = score\n", + " \n", + " if not scores: return \"Unknown\"\n", + " return max(scores, key=scores.get)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "681f297a-1520-4f5f-bf33-7a9af7b5ecf8", + "metadata": {}, + "outputs": [], + "source": [ + "def patch_short_segments(analyzed_segments, min_chords=2):\n", + " \"\"\"\n", + " Parameters:\n", + " analyzed_segments: list, {'label':..., 'chords':..., 'key':...}\n", + " min_chords: threshold,chords less than this quantity will be corrected (default is 2)\n", + " \"\"\"\n", + " \n", + " # Scan twice\n", + " \n", + " # orward Pass\n", + " for i in range(1, len(analyzed_segments)):\n", + " current_seg = analyzed_segments[i]\n", + " prev_seg = analyzed_segments[i-1]\n", + " \n", + " if len(current_seg['chords']) < min_chords:\n", + " if prev_seg['key'] != \"Unknown\":\n", + " current_seg['key'] = prev_seg['key']\n", + " current_seg['key_source'] = 'borrowed_prev' \n", + "\n", + " # Backward Pass\n", + " for i in range(len(analyzed_segments) - 2, -1, -1):\n", + " current_seg = analyzed_segments[i]\n", + " next_seg = analyzed_segments[i+1]\n", + " \n", + " if len(current_seg['chords']) < min_chords:\n", + " # (Usually Intro should follow Verse's tone)\n", + " if next_seg['key'] != \"Unknown\":\n", + " current_seg['key'] = next_seg['key']\n", + " current_seg['key_source'] = 'borrowed_next'\n", + " \n", + " return analyzed_segments" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "795661aa-e36a-40b3-8d4e-6bdd5e216566", + "metadata": {}, + "outputs": [], + "source": [ + "# ==========================================\n", + "# Roman Numeral Conversion\n", + "# ==========================================\n", + "ROMAN_MAP = {\n", + " 0: 'I', 1: 'bII', 2: 'II', 3: 'bIII', 4: 'III', 5: 'IV', \n", + " 6: 'bV', 7: 'V', 8: 'bVI', 9: 'VI', 10: 'bVII', 11: 'VII'\n", + "}\n", + "\n", + "def convert_to_roman(chord_list, key_str):\n", + " \"\"\"\n", + " input: chord_list=['C', 'F', 'G'], key_str='C Major'\n", + " output: ['I', 'IV', 'V']\n", + " \"\"\"\n", + " if key_str == \"Unknown\":\n", + " return chord_list\n", + " \n", + " # Analyze the main tone of tonality\n", + " key_root_str = key_str.split()[0] # \"C Major\" -> \"C\"\n", + " is_minor_key = \"Minor\" in key_str\n", + " \n", + " if key_root_str not in NOTES: return chord_list\n", + " key_root_idx = NOTES.index(key_root_str)\n", + " \n", + " roman_output = []\n", + " \n", + " for chord_str in chord_list:\n", + " root, quality = parse_chord_root_quality(chord_str)\n", + " if not root:\n", + " roman_output.append(\"?\")\n", + " continue\n", + " \n", + " # Calculate Interval\n", + " # (Root note of chord - tonic tonic) % 12\n", + " root_idx = NOTES.index(root)\n", + " interval = (root_idx - key_root_idx) % 12\n", + " \n", + " base_roman = ROMAN_MAP.get(interval, \"?\")\n", + " \n", + " # Minor triad (min) or subtract triad (dim) -> Lowercase\n", + " if quality == 'min' or quality == 'dim':\n", + " final_roman = base_roman.lower()\n", + " else:\n", + " final_roman = base_roman\n", + " \n", + " # Suffix\n", + " if quality == '7':\n", + " final_roman += '7'\n", + " elif quality == 'dim':\n", + " final_roman += '°'\n", + " \n", + " roman_output.append(final_roman)\n", + " \n", + " return roman_output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9866344c-1635-4210-ab52-273b47da3af1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "258f2c66-854d-4eef-9289-8025ac837ad4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' C F C E7 Amin C F C G7 C F C E7 Amin C F G7 C F C E7 Amin C F C G7 C F C E7 Amin C F G7 C F C F C G C F C E7 Amin C F G7 C D G D G D A D G D Fs7 Bmin D G A7 D G A7 D'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['chords'][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "be20964f-09f4-4d5c-8376-d2c86b3eb90b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'intro_1', 'chords': ['C']},\n", + " {'label': 'verse_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C']},\n", + " {'label': 'verse_2',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C']},\n", + " {'label': 'chorus_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C']},\n", + " {'label': 'solo_1', 'chords': ['D']},\n", + " {'label': 'chorus_2',\n", + " 'chords': ['G',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'A',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'Fs7',\n", + " 'Bmin',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D']}]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "segments = segment_music_string(df['chords'][0])\n", + "segments" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "df725e56-0714-40d6-93e1-8f61b983e85d", + "metadata": {}, + "outputs": [], + "source": [ + "analyzed_records = []\n", + "for seg in segments:\n", + " inferred_key = infer_key_from_list(seg['chords'])\n", + " analyzed_records.append({\n", + " 'label': seg['label'],\n", + " 'chords': seg['chords'],\n", + " 'key': inferred_key\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "578b5a47-b5b3-4b6c-a66e-9dc754b0c686", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'intro_1', 'chords': ['C'], 'key': 'C Major'},\n", + " {'label': 'verse_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'verse_2',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'chorus_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'solo_1', 'chords': ['D'], 'key': 'D Major'},\n", + " {'label': 'chorus_2',\n", + " 'chords': ['G',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'A',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'Fs7',\n", + " 'Bmin',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D'],\n", + " 'key': 'D Major'}]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analyzed_records" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "64760bf8-2949-4eee-b23d-d87b1b52ec4d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'label': 'intro_1',\n", + " 'chords': ['C'],\n", + " 'key': 'C Major',\n", + " 'key_source': 'borrowed_next'},\n", + " {'label': 'verse_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'verse_2',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G7',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'chorus_1',\n", + " 'chords': ['F',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'G',\n", + " 'C',\n", + " 'F',\n", + " 'C',\n", + " 'E7',\n", + " 'Amin',\n", + " 'C',\n", + " 'F',\n", + " 'G7',\n", + " 'C'],\n", + " 'key': 'C Major'},\n", + " {'label': 'solo_1',\n", + " 'chords': ['D'],\n", + " 'key': 'D Major',\n", + " 'key_source': 'borrowed_next'},\n", + " {'label': 'chorus_2',\n", + " 'chords': ['G',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'A',\n", + " 'D',\n", + " 'G',\n", + " 'D',\n", + " 'Fs7',\n", + " 'Bmin',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D',\n", + " 'G',\n", + " 'A7',\n", + " 'D'],\n", + " 'key': 'D Major'}]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analyzed_records = patch_short_segments(analyzed_records, min_chords=2)\n", + "analyzed_records" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "12151628-8b0f-4206-bf37-630e40f87d57", + "metadata": {}, + "outputs": [], + "source": [ + "chords_output = []\n", + "for record in analyzed_records:\n", + " roman = convert_to_roman(record['chords'], record['key'])\n", + "\n", + " chords_output.append({\n", + " 'section': record['label'],\n", + " 'key': record['key'],\n", + " 'roman': roman\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "c5413c18-a6a0-4daa-a607-7aa607c43010", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'section': 'intro_1', 'key': 'C Major', 'roman': ['I']},\n", + " {'section': 'verse_1',\n", + " 'key': 'C Major',\n", + " 'roman': ['IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'V7',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'V7',\n", + " 'I']},\n", + " {'section': 'verse_2',\n", + " 'key': 'C Major',\n", + " 'roman': ['IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'V7',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'V7',\n", + " 'I']},\n", + " {'section': 'chorus_1',\n", + " 'key': 'C Major',\n", + " 'roman': ['IV',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'V',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'V7',\n", + " 'I']},\n", + " {'section': 'solo_1', 'key': 'D Major', 'roman': ['I']},\n", + " {'section': 'chorus_2',\n", + " 'key': 'D Major',\n", + " 'roman': ['IV',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'V',\n", + " 'I',\n", + " 'IV',\n", + " 'I',\n", + " 'III7',\n", + " 'vi',\n", + " 'I',\n", + " 'IV',\n", + " 'V7',\n", + " 'I',\n", + " 'IV',\n", + " 'V7',\n", + " 'I']}]" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chords_output" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "96efd397-4835-41db-9692-73a86d371f2d", + "metadata": {}, + "outputs": [], + "source": [ + "def extract_and_format_roman(analyzed_records, include_tags=True):\n", + " \"\"\"\n", + " Parameters:\n", + " analyzed_records: chords_output in the former step\n", + " include_tags: \n", + " - True: Keep \" I IV I V\" (Structure + Sequence)\n", + " - False: Only \"I IV I V\" (Sequence)\n", + " \n", + " Return:\n", + " str: Roman numerals string\n", + " \"\"\"\n", + " output_parts = []\n", + " \n", + " for record in analyzed_records:\n", + " roman_seq = \" \".join(record['roman'])\n", + " \n", + " if include_tags:\n", + " # \"