-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathmost-common-word.py
133 lines (127 loc) · 4.46 KB
/
most-common-word.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Time: O(m + n), m is the size of banned, n is the size of paragraph
# Space: O(m + n)
# Given a paragraph and a list of banned words,
# return the most frequent word that is not in the list of banned words.
# It is guaranteed there is at least one word that isn't banned, and that the answer is unique.
#
# Words in the list of banned words are given in lowercase, and free of
# punctuation.
# Words in the paragraph are not case sensitive.
# The answer is in lowercase.
#
# Example:
# Input:
# paragraph = "Bob hit a ball, the hit BALL flew far after it was hit."
# banned = ["hit"]
# Output: "ball"
# Explanation:
# "hit" occurs 3 times, but it is a banned word.
# "ball" occurs twice (and no other word does), so it is the most frequent
# non-banned word in the paragraph.
# Note that words in the paragraph are not case sensitive,
# that punctuation is ignored (even if adjacent to words, such as "ball,"),
# and that "hit" isn't the answer even though it occurs more because it is
# banned.
#
# Note:
# - 1 <= paragraph.length <= 1000.
# - 1 <= banned.length <= 100.
# - 1 <= banned[i].length <= 10.
# - The answer is unique, and written in lowercase
# (even if its occurrences in paragraph may have uppercase symbols,
# and even if it is a proper noun.)
# - paragraph only consists of letters, spaces,
# or the punctuation symbols !?',;.
# - Different words in paragraph are always separated by a space.
# - There are no hyphens or hyphenated words.
# - Words only consist of letters, never apostrophes or
# other punctuation symbols.
# V0
# IDEA : REGULAR EXPRESSION + COLLECTION
import collections
class Solution:
def mostCommonWord(self, paragraph, banned):
p = re.compile(r"[!?',;.]")
sub_para = p.sub('', paragraph.lower())
words = sub_para.split(' ')
words = [word for word in words if word not in banned]
count = collections.Counter(words)
return count.most_common(1)[0][0]
# V1
# https://blog.csdn.net/fuxuemingzhu/article/details/80472079
import collections
class Solution:
def mostCommonWord(self, paragraph, banned):
"""
:type paragraph: str
:type banned: List[str]
:rtype: str
"""
p = re.compile(r"[!?',;.]")
sub_para = p.sub('', paragraph.lower())
words = sub_para.split(' ')
words = [word for word in words if word not in banned]
count = collections.Counter(words)
return count.most_common(1)[0][0]
### Test case
s=Solution()
paragraph = "Bob hit a ball, the hit BALL flew far after it was hit."
banned = ["hit"]
assert s.mostCommonWord(paragraph, banned) == "ball"
paragraph = "123"
banned = [""]
assert s.mostCommonWord(paragraph, banned) == "123"
paragraph = "aaa"
banned = ["a"]
assert s.mostCommonWord(paragraph, banned) == "aaa"
paragraph = "Words in the list of banned words are given in the lowercase, and free o"
banned = ["words"]
assert s.mostCommonWord(paragraph, banned) == "in"
# V1'
# https://blog.csdn.net/fuxuemingzhu/article/details/80472079
# http://bookshadow.com/weblog/2018/04/15/leetcode-most-common-word/
class Solution(object):
def mostCommonWord(self, paragraph, banned):
"""
:type paragraph: str
:type banned: List[str]
:rtype: str
"""
paragraph = re.findall(r"\w+", paragraph.lower())
count = collections.Counter(x for x in paragraph if x not in banned)
return count.most_common(1)[0][0]
# V1'
# http://bookshadow.com/weblog/2018/04/15/leetcode-most-common-word/
class Solution(object):
def mostCommonWord(self, paragraph, banned):
"""
:type paragraph: str
:type banned: List[str]
:rtype: str
"""
tokens = re.sub('[\!\?\'\,;\.]', '', paragraph.lower()).split()
cnt = collections.Counter(tokens)
for ban in banned:
if ban in cnt:
del cnt[ban]
return cnt.most_common(1)[0][0]
# V2
import collections
class Solution(object):
def mostCommonWord(self, paragraph, banned):
"""
:type paragraph: str
:type banned: List[str]
:rtype: str
"""
lookup = set(banned)
counts = collections.Counter(
word.strip("!?',;.")
for word in paragraph.lower().split()
)
result = ''
for word in counts:
if (not result or counts[word] > counts[result]) and \
word not in lookup:
result = word
return result