Files
NovelFetch/novelfetch/main.py
CodyZhang e0656674ee init
2020-05-21 18:26:22 +08:00

33 lines
831 B
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from bs4 import BeautifulSoup
import requests
import re
first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'
domain = re.search('https://.+?/', first_page).group()
if domain[-1:] != "/":
domain += "/"
res = requests.get(first_page, timeout=30)
res.encoding = 'big5'
soup = BeautifulSoup(res.text, 'lxml')
novelTitle = soup.find("h1", class_="novel_title")
novelContent = soup.find("div", class_="novel_content")
nextPageTag = soup.find("a", text="下一章")["href"]
if nextPageTag[:1] == "/":
nextPageTag = nextPageTag[1:]
nextPageLink = domain+nextPageTag
print(nextPageLink)
modifiedContent = novelContent.text.replace("    ", "")
# print(novelTitle)
# print(novelContent.contents)
with open("fetched.txt", "w", encoding='utf-8') as f:
f.write(novelTitle.string)
f.write(modifiedContent)