forked from lucaspauker/music-matcher
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmusic_matcher.py
111 lines (95 loc) · 3.89 KB
/
music_matcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import IPython.display as ipd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import sklearn as skl
#import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
#import librosa
#import librosa.display
import ast
#import utils
METADATA_DIR = "./fma_metadata/"
def load(filepath):
"""
Based off code from the fma github
"""
filename = os.path.basename(filepath)
if "features" in filename:
return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])
if "echonest" in filename:
return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])
if "genres" in filename:
return pd.read_csv(filepath, index_col=0)
if "tracks" in filename:
tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])
COLUMNS = [("track", "tags"), ("album", "tags"), ("artist", "tags"),
("track", "genres"), ("track", "genres_all")]
for column in COLUMNS:
tracks[column] = tracks[column].map(ast.literal_eval)
COLUMNS = [("track", "date_created"), ("track", "date_recorded"),
("album", "date_created"), ("album", "date_released"),
("artist", "date_created"), ("artist", "active_year_begin"),
("artist", "active_year_end")]
for column in COLUMNS:
tracks[column] = pd.to_datetime(tracks[column])
SUBSETS = ("small", "medium", "large")
tracks["set", "subset"] = tracks["set", "subset"].astype(
"category", categories=SUBSETS, ordered=True)
COLUMNS = [("track", "genre_top"), ("track", "license"),
("album", "type"), ("album", "information"),
("artist", "bio")]
for column in COLUMNS:
tracks[column] = tracks[column].astype("category")
return tracks
def extract_features(data):
features = load(METADATA_DIR + "features.csv")
keywords = [("spectral_centroid", "mean"), ("spectral_centroid", "std"), ("chroma_stft", "mean"), ("chroma_stft", "std")]
feature_table = {}
for composer in data:
feature_table[composer] = []
tracks = data[composer]
for track in tracks:
track_dict = {}
tid = track.name
track_dict["title"] = track["track", "title"]
for keyword in keywords:
try:
track_dict[keyword] = features[keyword].loc[[tid]]["01"].item()
except KeyError:
track_dict[feature] = None
continue
feature_table[composer] += [track_dict]
return feature_table
def load_data(composers_to_learn=None):
tracks = load(METADATA_DIR + "tracks.csv")
#genres = load(METADATA_DIR + "genres.csv")
#features = load(METADATA_DIR + "features.csv")
tracks = tracks[tracks["track", "genre_top"] == "Classical"]
tracks = tracks[tracks["track", "composer"].notnull()]
composer_dict = {}
for index, row in tracks.iterrows():
composer = row["track", "composer"]
if composers_to_learn:
for c in composers_to_learn:
if c in composer:
composer = composers_to_learn[composers_to_learn.index(c)]
if composers_to_learn and composer not in composers_to_learn:
continue
if composer not in composer_dict:
composer_dict[composer] = [row]
else:
composer_dict[composer] += [row]
return composer_dict
def count_data(composer_dict):
count_dict = {}
for composer in composer_dict:
count_dict[composer] = len(composer_dict[composer])
return count_dict
if __name__=="__main__":
composers_to_learn = ["Bach", "Haydn", "Alkan", "Orff"]
composer_data = load_data(composers_to_learn)
count_dict = count_data(composer_data)
print(count_dict)
features = extract_features(composer_data)
print(features)