Skip to content

Commit

Permalink
Fix -s site list for adapter_hpfanficarchivecom.
Browse files Browse the repository at this point in the history
  • Loading branch information
JimmXinu committed Sep 14, 2020
1 parent f09a76f commit c57470e
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions fanficfare/adapters/adapter_hpfanficarchivecom.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,8 @@ def __init__(self, config, url):
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])

# has changed from http to https to http again.
self.protocol='http://'

# normalized story URL.
self._setURL( self.protocol + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))
self._setURL( self.getProtocol() + self.getSiteDomain() + '/stories/viewstory.php?sid='+self.story.getMetadata('storyId'))

# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','hpffa')
Expand All @@ -65,9 +62,14 @@ def getSiteDomain():
# The site domain. Does have www here, if it uses it.
return 'hpfanficarchive.com'

@classmethod
def getProtocol(cls):
# has changed from http to https to http again.
return "http://"

@classmethod
def getSiteExampleURLs(cls):
return self.protocol+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"
return cls.getProtocol()+cls.getSiteDomain()+"/stories/viewstory.php?sid=1234"

def getSiteURLPattern(self):
return r"https?:"+re.escape("//"+self.getSiteDomain()+"/stories/viewstory.php?sid=")+r"\d+$"
Expand Down Expand Up @@ -107,13 +109,13 @@ def extractChapterUrlsAndMetadata(self):
# Find authorid and URL from... author url.
a = soup.find('div', id="mainpage").find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
self.story.setMetadata('authorId',a['href'].split('=')[1])
self.story.setMetadata('authorUrl',self.protocol+self.host+'/stories/'+a['href'])
self.story.setMetadata('authorUrl',self.getProtocol()+self.host+'/stories/'+a['href'])
self.story.setMetadata('author',a.string)

# Find the chapters:
for chapter in soup.findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+r"&chapter=\d+$")):
# just in case there's tags, like <i> in chapter titles.
self.add_chapter(chapter,self.protocol+self.host+'/stories/'+chapter['href'])
self.add_chapter(chapter,self.getProtocol()+self.host+'/stories/'+chapter['href'])


# eFiction sites don't help us out a lot with their meta data
Expand Down Expand Up @@ -189,7 +191,7 @@ def defaultGetattr(d,k):
# Find Series name from series URL.
a = soup.find('a', href=re.compile(r"viewseries.php\?seriesid=\d+"))
series_name = a.string
series_url = self.protocol+self.host+'/stories/'+a['href']
series_url = self.getProtocol()+self.host+'/stories/'+a['href']

# use BeautifulSoup HTML parser to make everything easier to find.
seriessoup = self.make_soup(self._fetchUrl(series_url))
Expand Down

0 comments on commit c57470e

Please sign in to comment.