-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnews_page_objects.py
51 lines (36 loc) · 1.33 KB
/
news_page_objects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import bs4
import requests
from common import config
class NewsPage:
def __init__(self, news_site_uid, url):
self._config = config()['news_sites'][news_site_uid]
self._queries = self._config['queries']
self._html = None
self._visit(url)
def _select(self, query_string):
return self._html.select(query_string)
def _visit(self, url):
response = requests.get(url)
response.raise_for_status()
self._html = bs4.BeautifulSoup(response.text, 'html.parser')
class HomePage(NewsPage):
def __init__(self, news_site_uid, url):
super().__init__(news_site_uid, url)
@property
def article_links(self):
link_list = []
for link in self._select(self._queries['homepage_article_links']):
if link and link.has_attr('href'):
link_list.append(link)
return set(link['href'] for link in link_list)
class ArticlePage(NewsPage):
def __init__(self, news_site_uid, url):
super().__init__(news_site_uid, url)
@property
def body(self):
result = self._select(self._queries['article_body'])
return result[0].text if len(result) else ''
@property
def title(self):
result = self._select(self._queries['article_title'])
return result[0].text if len(result) else ''