forked from masterdatascience-UIMP-UC/introduccion-a-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lyrics.py
62 lines (56 loc) · 1.79 KB
/
lyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def split_into_words(lyrics):
"""
Split a string into lowercase words, removing all punctuation characters,
returning the result.
"""
result = []
for word in lyrics.lower().split(): # lower() convierte las palabras en minusculas
word = word.strip(',.;()"¡!') # strip() elimina del incio y del final los caracteres que le pasemos
result.append(word)
return result
def words_to_frequencies(lyrics):
"""
Convert words into frequencies. Return a dictionarky whose keys are the
words with the frequency as the value
"""
freqs = {}
for word in lyrics:
if word in freqs:
freqs[word] += 1
else:
freqs[word] = 1
# Alternativa al if anterior
# freqs[word] = freqs.get(word, 0) + 1
# Otra alternativa
# freqs.setdefault(word, 0)
# freqs[word] += 1
return freqs
def most_common_words(frequencies):
"""
Return a tuple containing:
* The number of occurences of a word in the first tuple element
* A list containing the words with that frequency
"""
Values = frequencies.values()
Max = max(Values)
words = []
for word, score in frequencies.items():
if score == Max:
words.append(word)
return (Max, words)
def get_more_often_user_words(frequencies,threshold=10):
"""
Return a list of the words that are used more often, above
the *optional* threshold. If no threshold is passed, use 10.
"""
results=[]
frequencies=frequencies.copy()
while True:
score = most_common_words(frequencies)
if score[0] < threshold:
break
else:
for w in score[1]:
del frequencies[w]
results.append(score)
return results