From 795732b0aee161339f6174332be00488dfcafc10 Mon Sep 17 00:00:00 2001 From: yjhmelody <465402634@qq.com> Date: Sun, 2 Apr 2017 22:46:57 +0800 Subject: [PATCH 1/3] Adds html.parser to BeautifulSoup([markup], html.parser) calls --- chapter1/2-beautifulSoup.py | 2 +- chapter1/3-exceptionHandling.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/chapter1/2-beautifulSoup.py b/chapter1/2-beautifulSoup.py index 1911093..9b159fc 100644 --- a/chapter1/2-beautifulSoup.py +++ b/chapter1/2-beautifulSoup.py @@ -2,5 +2,5 @@ from bs4 import BeautifulSoup html = urlopen("http://www.pythonscraping.com/exercises/exercise1.html") -bsObj = BeautifulSoup(html.read()) +bsObj = BeautifulSoup(html.read(), "html.parser") print(bsObj.h1) diff --git a/chapter1/3-exceptionHandling.py b/chapter1/3-exceptionHandling.py index 331a7ee..a580526 100644 --- a/chapter1/3-exceptionHandling.py +++ b/chapter1/3-exceptionHandling.py @@ -1,8 +1,6 @@ from urllib.request import urlopen from urllib.error import HTTPError from bs4 import BeautifulSoup -import sys - def getTitle(url): try: @@ -11,7 +9,7 @@ def getTitle(url): print(e) return None try: - bsObj = BeautifulSoup(html.read()) + bsObj = BeautifulSoup(html.read(), "html.parser") title = bsObj.body.h1 except AttributeError as e: return None From aa2e66900905387a0d8288987c67d8a8980ab44a Mon Sep 17 00:00:00 2001 From: yjh <465402634@qq.com> Date: Sun, 2 Apr 2017 23:33:08 +0800 Subject: [PATCH 2/3] fix path --- chapter5/3-scrapeCsv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter5/3-scrapeCsv.py b/chapter5/3-scrapeCsv.py index 4b68abe..607dbf4 100644 --- a/chapter5/3-scrapeCsv.py +++ b/chapter5/3-scrapeCsv.py @@ -8,7 +8,7 @@ table = bsObj.findAll("table",{"class":"wikitable"})[0] rows = table.findAll("tr") -csvFile = open("files/editors.csv", 'wt', newline='', encoding='utf-8') +csvFile = open("../files/editors.csv", 'wt', newline='', encoding='utf-8') writer = csv.writer(csvFile) try: for row in rows: From 903b09e04f0c432897ca205961609e53ada7aa7f Mon Sep 17 00:00:00 2001 From: yjh <465402634@qq.com> Date: Sun, 2 Apr 2017 23:36:29 +0800 Subject: [PATCH 3/3] fix TabError --- chapter5/8-sendEmailWhenChristmas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chapter5/8-sendEmailWhenChristmas.py b/chapter5/8-sendEmailWhenChristmas.py index d738ec3..3943ad0 100644 --- a/chapter5/8-sendEmailWhenChristmas.py +++ b/chapter5/8-sendEmailWhenChristmas.py @@ -10,9 +10,9 @@ def sendMail(subject, body): msg['From'] = "christmas_alerts@pythonscraping.com" msg['To'] = "ryan@pythonscraping.com" - s = smtplib.SMTP('localhost') - s.send_message(msg) - s.quit() + s = smtplib.SMTP('localhost') + s.send_message(msg) + s.quit() bsObj = BeautifulSoup(urlopen("https://isitchristmas.com/")) while(bsObj.find("a", {"id":"answer"}).attrs['title'] == "NO"):