33 lines
831 B
Python
33 lines
831 B
Python
from bs4 import BeautifulSoup
|
||
import requests
|
||
import re
|
||
|
||
first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'
|
||
|
||
domain = re.search('https://.+?/', first_page).group()
|
||
if domain[-1:] != "/":
|
||
domain += "/"
|
||
|
||
res = requests.get(first_page, timeout=30)
|
||
res.encoding = 'big5'
|
||
|
||
soup = BeautifulSoup(res.text, 'lxml')
|
||
|
||
novelTitle = soup.find("h1", class_="novel_title")
|
||
novelContent = soup.find("div", class_="novel_content")
|
||
|
||
nextPageTag = soup.find("a", text="下一章")["href"]
|
||
if nextPageTag[:1] == "/":
|
||
nextPageTag = nextPageTag[1:]
|
||
|
||
nextPageLink = domain+nextPageTag
|
||
print(nextPageLink)
|
||
modifiedContent = novelContent.text.replace(" ", "")
|
||
|
||
# print(novelTitle)
|
||
# print(novelContent.contents)
|
||
with open("fetched.txt", "w", encoding='utf-8') as f:
|
||
f.write(novelTitle.string)
|
||
f.write(modifiedContent)
|
||
|