This commit is contained in:
CodyZhang
2020-05-21 18:26:22 +08:00
commit e0656674ee
4 changed files with 255 additions and 0 deletions

0
novelfetch/README.md Normal file
View File

0
novelfetch/__init__.py Normal file
View File

32
novelfetch/main.py Normal file
View File

@@ -0,0 +1,32 @@
from bs4 import BeautifulSoup
import requests
import re
first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'
domain = re.search('https://.+?/', first_page).group()
if domain[-1:] != "/":
domain += "/"
res = requests.get(first_page, timeout=30)
res.encoding = 'big5'
soup = BeautifulSoup(res.text, 'lxml')
novelTitle = soup.find("h1", class_="novel_title")
novelContent = soup.find("div", class_="novel_content")
nextPageTag = soup.find("a", text="下一章")["href"]
if nextPageTag[:1] == "/":
nextPageTag = nextPageTag[1:]
nextPageLink = domain+nextPageTag
print(nextPageLink)
modifiedContent = novelContent.text.replace("    ", "")
# print(novelTitle)
# print(novelContent.contents)
with open("fetched.txt", "w", encoding='utf-8') as f:
f.write(novelTitle.string)
f.write(modifiedContent)