-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
344 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
1.408367168897572741e+00;1.757964546987063059e+00;1.412795836484065815e+00;1.539532365180545703e+00;4.848876374569938141e-01 | ||
1.054976144967554541e+00;1.757964546987063059e+00;4.597192801257678485e-01;1.539532365180545703e+00;1.390721685453574885e+00 | ||
1.231671656932563641e+00;6.645963531292558013e-01;4.597192801257678485e-01;1.699118037180968210e+00;1.571888495052891077e+00 | ||
3.481940971075180302e-01;1.029052417748524961e+00;2.556487704114023707e+00;1.858703709181390717e+00;-4.209464105395876454e-01 | ||
1.761758192827591163e+00;-4.287718407285517341e-01;1.031565213940746961e+00;5.820183331780111047e-01;8.472212566556260871e-01 | ||
3.481940971075180302e-01;1.029052417748524961e+00;2.691039688541081443e-01;-3.754956988245231608e-01;1.209554875854258693e+00 | ||
-5.352834627175273585e-01;-6.431577610928257416e-02;1.222180525212406499e+00;7.416040051784336118e-01;1.028388066254942279e+00 | ||
3.481940971075180302e-01;6.645963531292558013e-01;4.597192801257678485e-01;-5.632435482367830620e-02;8.472212566556260871e-01 | ||
7.015851210375363411e-01;-1.157683969967089999e+00;7.848865758244849555e-02;1.539532365180545703e+00;8.472212566556260871e-01 | ||
7.015851210375363411e-01;8.468243854388903813e-01;4.597192801257678485e-01;-3.754956988245231608e-01;4.848876374569938141e-01 | ||
-1.818924387875090753e-01;-6.431577610928257416e-02;6.503345913974275527e-01;5.820183331780111047e-01;6.660544470563101171e-01 | ||
1.054976144967554541e+00;-1.157683969967089999e+00;-1.121266536892111809e-01;9.011896771788560079e-01;8.472212566556260871e-01 | ||
7.015851210375363411e-01;-6.431577610928257416e-02;6.503345913974275527e-01;-5.632435482367830620e-02;3.037208278576775111e-01 | ||
-5.196926822499936365e-03;1.393508482367794121e+00;8.409499026690872014e-01;-3.754956988245231608e-01;-4.209464105395876454e-01 | ||
-1.818924387875090753e-01;-1.157683969967089999e+00;8.409499026690872014e-01;4.224326611775887086e-01;6.660544470563101171e-01 | ||
1.408367168897572741e+00;6.645963531292558013e-01;-1.446433832590828805e+00;4.224326611775887086e-01;-6.021132201389038929e-01 | ||
1.054976144967554541e+00;6.645963531292558013e-01;-8.745878987758498591e-01;-5.350813708249456679e-01;1.225540182583612220e-01 | ||
1.714985851425088748e-01;-1.157683969967089999e+00;6.503345913974275527e-01;1.032613171767441246e-01;4.848876374569938141e-01 | ||
-1.818924387875090753e-01;-1.157683969967089999e+00;-4.933572762325305061e-01;7.416040051784336118e-01;1.209554875854258693e+00 | ||
-7.119789746825364585e-01;6.645963531292558013e-01;1.222180525212406499e+00;-6.946670428253680640e-01;-7.832800297382201959e-01 | ||
-1.818924387875090753e-01;-1.157683969967089999e+00;7.848865758244849555e-02;-3.754956988245231608e-01;1.209554875854258693e+00 | ||
-3.585879507525182031e-01;6.645963531292558013e-01;-6.839725875041902103e-01;-5.632435482367830620e-02;-2.397796009402713424e-01 | ||
1.714985851425088748e-01;-7.932279053478209496e-01;-4.933572762325305061e-01;5.820183331780111047e-01;-7.832800297382201959e-01 | ||
3.481940971075180302e-01;-1.157683969967089999e+00;-1.255818521319169268e+00;5.820183331780111047e-01;-5.861279134095506022e-02 | ||
7.015851210375363411e-01;-1.157683969967089999e+00;1.031565213940746961e+00;-1.492595402827480155e+00;-7.832800297382201959e-01 | ||
-1.818924387875090753e-01;-6.431577610928257416e-02;-6.839725875041902103e-01;-1.333009730827057870e+00;3.037208278576775111e-01 | ||
-1.948847558437600380e+00;8.468243854388903813e-01;-6.839725875041902103e-01;-3.754956988245231608e-01;-6.021132201389038929e-01 | ||
-3.585879507525182031e-01;-1.157683969967089999e+00;-1.446433832590828805e+00;-3.754956988245231608e-01;-6.021132201389038929e-01 | ||
-1.418761022542573080e+00;-1.157683969967089999e+00;-3.027419649608708574e-01;-5.350813708249456679e-01;-7.832800297382201959e-01 | ||
-2.125543070402609480e+00;1.393508482367794121e+00;-1.065203210047509508e+00;-1.173424058826635363e+00;-1.326780458536169105e+00 | ||
-1.242065510577563980e+00;6.645963531292558013e-01;-1.446433832590828805e+00;-1.333009730827057870e+00;-1.870280887334117903e+00 | ||
-8.886744866475456694e-01;-1.157683969967089999e+00;-8.745878987758498591e-01;-8.542527148257904601e-01;-1.507947268135485297e+00 | ||
-1.772152046472591280e+00;3.001402885099865858e-01;-1.065203210047509508e+00;-1.971352418828747455e+00;-1.870280887334117903e+00 | ||
-1.242065510577563980e+00;-7.932279053478209496e-01;-1.446433832590828805e+00;-1.492595402827480155e+00;-1.870280887334117903e+00 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,309 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Sascha Spors,\n", | ||
"Professorship Signal Theory and Digital Signal Processing,\n", | ||
"Institute of Communications Engineering (INT),\n", | ||
"Faculty of Computer Science and Electrical Engineering (IEF),\n", | ||
"University of Rostock,\n", | ||
"Germany\n", | ||
"\n", | ||
"# Data Driven Audio Signal Processing - A Tutorial with Computational Examples\n", | ||
"\n", | ||
"Master Course #24512\n", | ||
"\n", | ||
"- lecture: https://github.com/spatialaudio/data-driven-audio-signal-processing-lecture\n", | ||
"- tutorial: https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise\n", | ||
"\n", | ||
"Feel free to contact lecturer [email protected]\n", | ||
"\n", | ||
"# PCA on Achieved Points of Written Examination " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import scipy\n", | ||
"from scipy.linalg import svd, diagsvd\n", | ||
"import matplotlib as mpl\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"\n", | ||
"np.set_printoptions(precision=3, sign=' ', suppress=True)\n", | ||
"\n", | ||
"print(np.__version__) # tested with 1.26.4\n", | ||
"print(scipy.__version__) # tested with 1.13.1\n", | ||
"print(mpl.__version__) # tested with 3.9.2" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"X = np.loadtxt(open(\"exam_points_meanfree_unitvar.csv\", \"rb\"), delimiter=\";\", skiprows=0)\n", | ||
"N, F = X.shape\n", | ||
"print(N, F) # 34 students, 5 tasks for exam on signals & systems, a typical course in electrical engineering bachelor studies\n", | ||
"# columns correspond to theses tasks\n", | ||
"task_label = ['Task 1: Convolution', 'Task 2: Fourier', 'Task 3: Sampling', 'Task 4: Laplace Domain', 'Task 5: z-Domain']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# data in exam_points_meanfree_unitvar.csv is already mean-free and columns have var=1\n", | ||
"# so the numbers in X do not represent points or percentage,\n", | ||
"# but rather encode the performance of the students per task in a normalised way\n", | ||
"# X is however sorted: first row belongs to best grade, last row to worst grade\n", | ||
"np.mean(X, axis=0), np.std(X, axis=0, ddof=1), np.var(X, axis=0, ddof=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# for completeness of PCA algorithm ->\n", | ||
"# make X zscore (altough it is already)\n", | ||
"mu = np.mean(X, axis=0)\n", | ||
"X = X - mu # de-mean\n", | ||
"sigma = np.sqrt(np.sum(X**2, axis=0) / (N-1))\n", | ||
"X = X / sigma # normalise to std=1\n", | ||
"np.mean(X, axis=0), np.std(X, axis=0, ddof=1), np.var(X, axis=0, ddof=1) # check" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"X # print mean=0 / var=1 data matrix" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# get SVD / CovMatrix stuff\n", | ||
"[U, s, Vh] = svd(X)\n", | ||
"V = Vh.T # we don't use Vh later on!\n", | ||
"S = diagsvd(s, N, F) # sing vals matrix\n", | ||
"D, _ = np.linalg.eig(X.T @ X / (N-1)) # eig vals\n", | ||
"D = -np.sort(-D) # sort them, then ==\n", | ||
"d = s**2 / (N-1)\n", | ||
"print(np.allclose(d, D)) # so we go for d later on\n", | ||
"\n", | ||
"# switch polarities for nicer interpretation ot the\n", | ||
"# exam data\n", | ||
"V[:,0] *= -1\n", | ||
"U[:,0] *= -1\n", | ||
"\n", | ||
"V[:,2] *= -1\n", | ||
"U[:,2] *= -1\n", | ||
"\n", | ||
"V[:,3] *= -1\n", | ||
"U[:,3] *= -1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# PCA\n", | ||
"US = U @ S\n", | ||
"PC_Features = US @ diagsvd(1 / np.sqrt(d), F, F) # normalised such that columns have var 1, aka (normalised) PC scores\n", | ||
"print(np.var(PC_Features, axis=0, ddof=1))\n", | ||
"#PC_Loadings = (diagsvd(np.sqrt(d), F, F) @ V.T).T # ==\n", | ||
"PC_Loadings = V @ diagsvd(np.sqrt(d), F, F) # aka PC coeff, not unit-length anymore, but normalised such that it shows correlation between PC_Features and X" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.allclose(X, PC_Features @ PC_Loadings.T) # check correct matrix factorisation" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# project an x column vector to a pc feature column -> do this for all options -> get all weights for linear comb of pc features\n", | ||
"# correlation uses unit-length vectors\n", | ||
"PC_Loadings_manual = np.zeros((F, F))\n", | ||
"for row in range(F):\n", | ||
" tmp_x = X[:, row] / np.linalg.norm(X[:, row])\n", | ||
" for column in range(F):\n", | ||
" tmp_pc = PC_Features[:, column] / np.linalg.norm(PC_Features[:, column])\n", | ||
" PC_Loadings_manual[row, column] = np.inner(tmp_pc, tmp_x)\n", | ||
"np.allclose(PC_Loadings_manual, PC_Loadings) # we get the PC_Loadings matrix" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# explained variance\n", | ||
"d, np.var(US, axis=0, ddof=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# explained cum variance in %\n", | ||
"cum_var = np.cumsum(d) / np.sum(d) * 100\n", | ||
"cum_var" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Check via Plots" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"plt.figure(figsize=(12,8))\n", | ||
"\n", | ||
"plt.subplot(2,1,1)\n", | ||
"for f in range(F):\n", | ||
" plt.plot(X[:, f], 'o-', color='C'+str(f), label='Task '+str(f+1), ms=3)\n", | ||
"plt.legend(loc='lower left')\n", | ||
"plt.xticks([0, N-1], labels=['best grade', 'worst grade'])\n", | ||
"plt.ylabel('normalised points (mean-free, var=1)')\n", | ||
"plt.grid(True)\n", | ||
"plt.title(task_label)\n", | ||
"\n", | ||
"plt.subplot(2,1,2)\n", | ||
"for f in range(F):\n", | ||
" plt.plot(US[:, f], 'o-', color='C'+str(f), label='PCA v '+str(f+1), lw=(F-f)*2/3, ms=(F-f)*3/2)\n", | ||
"plt.legend(loc='lower left')\n", | ||
"plt.xticks([0, N-1], labels=['best grade', 'worst grade'])\n", | ||
"plt.ylabel('PC features (mean-free, sorted var)')\n", | ||
"plt.xlabel('student index (sorted grade)')\n", | ||
"plt.grid(True)\n", | ||
"plt.title(['cum var in %:', cum_var])\n", | ||
"plt.tight_layout()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# correlation between task and pc\n", | ||
"pc_label = ['PC 1', 'PC 2', 'PC 3', 'PC 4', 'PC 5']\n", | ||
"cmap = plt.get_cmap('Spectral_r', 8)\n", | ||
"fig = plt.figure(figsize=(6,4))\n", | ||
"ax = fig.add_subplot(111)\n", | ||
"cax = ax.matshow(PC_Loadings, cmap=cmap, vmin=-1, vmax=+1)\n", | ||
"fig.colorbar(cax)\n", | ||
"ax.set_xticks(np.arange(len(pc_label)))\n", | ||
"ax.set_yticks(np.arange(len(task_label)))\n", | ||
"ax.set_xticklabels(pc_label)\n", | ||
"ax.set_yticklabels(task_label)\n", | ||
"ax.set_title('Loading Matrix = PC x contributes to Task y')\n", | ||
"plt.tight_layout()\n", | ||
"\n", | ||
"# a rank 3 approximation of the data,\n", | ||
"# i.e. using only PC1, PC2 and PC3 in the linear combination to reconstruct X\n", | ||
"# would only change one grade by a 1/3 grade step\n", | ||
"# so 85.899 % explained variance would be enough to figure the actual grading\n", | ||
"\n", | ||
"# PC1 and PC2 might allow an intuitive interpretation:\n", | ||
"# students are very well prepared to convolution, Laplace and z-Domain tasks\n", | ||
"# as theses tasks are always very similar and definitiely will be queried in the exam\n", | ||
"# so, PC1 indidcates the performance on 'fulfilled' expectations and is\n", | ||
"# highly correlated with the achieved grade\n", | ||
"# the Fourier task and the sampling task were chosen out of a wide range of options\n", | ||
"# here students have rather 'unknown' expectations, which is why we need PC 2 to cover this\n", | ||
"#\n", | ||
"# PC 3 to 5 show positive vs. negative correlations, i.e. mostly one good task vs. one bad task performance\n", | ||
"# some of these results are intuitive: we know that students sometimes have preferences for Laplace vs. z-Domain " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.sum(PC_Loadings**2, axis=0) # that's again the explained variance of the PCs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"np.sum(PC_Loadings**2, axis=1) # communalities, must sum to 1 in our normalised handling" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Copyright\n", | ||
"\n", | ||
"- the notebooks are provided as [Open Educational Resources](https://en.wikipedia.org/wiki/Open_educational_resources)\n", | ||
"- feel free to use the notebooks for your own purposes\n", | ||
"- the text is licensed under [Creative Commons Attribution 4.0](https://creativecommons.org/licenses/by/4.0/)\n", | ||
"- the code of the IPython examples is licensed under the [MIT license](https://opensource.org/licenses/MIT)\n", | ||
"- please attribute the work as follows: *Frank Schultz, Data Driven Audio Signal Processing - A Tutorial Featuring Computational Examples, University of Rostock* ideally with relevant file(s), github URL https://github.com/spatialaudio/data-driven-audio-signal-processing-exercise, commit number and/or version tag, year." | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "myddasp", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |