-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathNewsData.py
80 lines (71 loc) · 1.89 KB
/
NewsData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Coded By Gowtham on 30/05/2020
# Coded Using Vim Text Editor
import requests
import json
from bs4 import BeautifulSoup as bs
from genUrl import getUrl
def paraParse(newsContent):
soup = bs(newsContent, "lxml")
pTags = soup.findAll("p")
cont = ""
for i in range(len(pTags)):
cont = cont + pTags[i].text + "\n"
return cont.strip()
def newsData(query, language):
newsDictionary = {
'success': True,
'data': []
}
URL = getUrl(query, language)
responseDict = json.loads(requests.get(URL).content)
rows = responseDict["data"]["rows"]
for row in rows:
try:
title = row["title"]
except:
title = ""
try:
url = row["shareUrl"]
except:
url = ""
try:
contentImage = row["contentImage"]
except:
contentImage = ""
try:
ImageUrl = contentImage["url"]
except:
ImageUrl = ""
try:
counter = row["counter"]
except:
counter = ""
try:
PublishedDt = counter["ingestionDate"]
except:
PublishedDt = ""
try:
viewCount = counter["viewsCount"]
except:
viewCount = ""
try:
newsContent = row["content"]
content = paraParse(newsContent)
except:
newsContent = ""
content = ""
try:
publisherStory = row["publisherStoryUrl"]
except:
publisherStory = ""
newsObject = {
'title': title,
'imageUrl': ImageUrl,
'url': url,
'content': content,
'PublishedTime': PublishedDt,
'viewCount': viewCount,
'publisherStory': publisherStory
}
newsDictionary['data'].append(newsObject)
return newsDictionary