from bs4 import BeautifulSoup import requests import re import win32clipboard, win32con first_page = 'https://tw.piaotian.cc/read/285398/98006676.html' domain = re.search('https://.+?/', first_page).group() if domain[-1:] != "/": domain += "/" res = requests.get(first_page, timeout=30) res.encoding = 'big5' soup = BeautifulSoup(res.text, 'lxml') novelTitle = soup.find("h1", class_="novel_title") novelContent = soup.find("div", class_="novel_content") nextPageTag = soup.find("a", text="下一章")["href"] if nextPageTag[:1] == "/": nextPageTag = nextPageTag[1:] nextPageLink = domain+nextPageTag modifiedContent = novelContent.text.replace("    ", "") print(modifiedContent) print(nextPageLink) win32clipboard.OpenClipboard() win32clipboard.EmptyClipboard() win32clipboard.SetClipboardData(win32con.CF_UNICODETEXT, nextPageLink) win32clipboard.CloseClipboard() # print(novelTitle) # print(novelContent.contents) with open("fetched.txt", "w", encoding='utf-8') as f: f.write(novelTitle.string) f.write(modifiedContent)