init

2020-05-21 18:26:22 +08:00
commit e0656674ee
4 changed files with 255 additions and 0 deletions
--- a/novelfetch/README.md
+++ b/novelfetch/README.md
--- a/novelfetch/init.py
+++ b/novelfetch/init.py
--- a/novelfetch/main.py
+++ b/novelfetch/main.py
@@ -0,0 +1,32 @@
+from bs4 import BeautifulSoup
+import requests
+import re
+
+first_page = 'https://tw.piaotian.cc/read/285398/66476921.html'
+
+domain = re.search('https://.+?/', first_page).group()
+if domain[-1:] != "/":
+    domain += "/"
+
+res = requests.get(first_page, timeout=30)
+res.encoding = 'big5'
+
+soup = BeautifulSoup(res.text, 'lxml')
+
+novelTitle = soup.find("h1", class_="novel_title")
+novelContent = soup.find("div", class_="novel_content")
+
+nextPageTag = soup.find("a", text="下一章")["href"]
+if nextPageTag[:1] == "/":
+    nextPageTag = nextPageTag[1:]
+
+nextPageLink = domain+nextPageTag
+print(nextPageLink)
+modifiedContent = novelContent.text.replace("    ", "")
+
+# print(novelTitle)
+# print(novelContent.contents)
+with open("fetched.txt", "w", encoding='utf-8') as f:
+    f.write(novelTitle.string)
+    f.write(modifiedContent)
+