NovelFetch/novelfetch/main.py

from bs4 import BeautifulSoup
import requests
import re

first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'

domain = re.search('https://.+?/', first_page).group()
if domain[-1:] != "/":
    domain += "/"

res = requests.get(first_page, timeout=30)
res.encoding = 'big5'

soup = BeautifulSoup(res.text, 'lxml')

novelTitle = soup.find("h1", class_="novel_title")
novelContent = soup.find("div", class_="novel_content")

nextPageTag = soup.find("a", text="下一章")["href"]
if nextPageTag[:1] == "/":
    nextPageTag = nextPageTag[1:]

nextPageLink = domain+nextPageTag
print(nextPageLink)
modifiedContent = novelContent.text.replace("    ", "")

# print(novelTitle)
# print(novelContent.contents)
with open("fetched.txt", "w", encoding='utf-8') as f:
    f.write(novelTitle.string)
    f.write(modifiedContent)