init
This commit is contained in:
32
novelfetch/main.py
Normal file
32
novelfetch/main.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import re
|
||||
|
||||
first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'
|
||||
|
||||
domain = re.search('https://.+?/', first_page).group()
|
||||
if domain[-1:] != "/":
|
||||
domain += "/"
|
||||
|
||||
res = requests.get(first_page, timeout=30)
|
||||
res.encoding = 'big5'
|
||||
|
||||
soup = BeautifulSoup(res.text, 'lxml')
|
||||
|
||||
novelTitle = soup.find("h1", class_="novel_title")
|
||||
novelContent = soup.find("div", class_="novel_content")
|
||||
|
||||
nextPageTag = soup.find("a", text="下一章")["href"]
|
||||
if nextPageTag[:1] == "/":
|
||||
nextPageTag = nextPageTag[1:]
|
||||
|
||||
nextPageLink = domain+nextPageTag
|
||||
print(nextPageLink)
|
||||
modifiedContent = novelContent.text.replace(" ", "")
|
||||
|
||||
# print(novelTitle)
|
||||
# print(novelContent.contents)
|
||||
with open("fetched.txt", "w", encoding='utf-8') as f:
|
||||
f.write(novelTitle.string)
|
||||
f.write(modifiedContent)
|
||||
|
||||
Reference in New Issue
Block a user