diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6241f04
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+cli
+chapter5/downloaded/
diff --git a/chapter1/2-beautifulSoup.py b/chapter1/2-beautifulSoup.py
index 1911093..9b159fc 100644
--- a/chapter1/2-beautifulSoup.py
+++ b/chapter1/2-beautifulSoup.py
@@ -2,5 +2,5 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://www.pythonscraping.com/exercises/exercise1.html")
-bsObj = BeautifulSoup(html.read())
+bsObj = BeautifulSoup(html.read(), "html.parser")
 print(bsObj.h1)
diff --git a/chapter1/3-exceptionHandling.py b/chapter1/3-exceptionHandling.py
index 331a7ee..65b482f 100644
--- a/chapter1/3-exceptionHandling.py
+++ b/chapter1/3-exceptionHandling.py
@@ -11,7 +11,7 @@ def getTitle(url):
         print(e)
         return None
     try:
-        bsObj = BeautifulSoup(html.read())
+        bsObj = BeautifulSoup(html.read(), "html.parser")
         title = bsObj.body.h1
     except AttributeError as e:
         return None
@@ -22,5 +22,5 @@ def getTitle(url):
     print("Title could not be found")
 else:
     print(title)
-    
-    
\ No newline at end of file
+
+
diff --git a/chapter2/1-selectByClass.py b/chapter2/1-selectByClass.py
index 7f4c489..2a90755 100644
--- a/chapter2/1-selectByClass.py
+++ b/chapter2/1-selectByClass.py
@@ -2,7 +2,7 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://www.pythonscraping.com/pages/warandpeace.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 nameList = bsObj.findAll("span", {"class":"green"})
 for name in nameList:
-    print(name.get_text())
\ No newline at end of file
+    print(name.get_text())
diff --git a/chapter2/2-selectByAttribute.py b/chapter2/2-selectByAttribute.py
index 01d9c90..e63426d 100644
--- a/chapter2/2-selectByAttribute.py
+++ b/chapter2/2-selectByAttribute.py
@@ -2,6 +2,6 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://www.pythonscraping.com/pages/warandpeace.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 allText = bsObj.findAll(id="text")
-print(allText[0].get_text())
\ No newline at end of file
+print(allText[0].get_text())
diff --git a/chapter2/3-findDescendants.py b/chapter2/3-findDescendants.py
index 7b127de..2f4616b 100644
--- a/chapter2/3-findDescendants.py
+++ b/chapter2/3-findDescendants.py
@@ -2,7 +2,7 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://www.pythonscraping.com/pages/page3.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 
 for child in bsObj.find("table",{"id":"giftList"}).children:
-    print(child)
\ No newline at end of file
+    print(child)
diff --git a/chapter2/4-findSiblings.py b/chapter2/4-findSiblings.py
index c850ef1..427b4ee 100644
--- a/chapter2/4-findSiblings.py
+++ b/chapter2/4-findSiblings.py
@@ -1,7 +1,7 @@
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
 html = urlopen("http://www.pythonscraping.com/pages/page3.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 
 for sibling in bsObj.find("table",{"id":"giftList"}).tr.next_siblings:
-    print(sibling) 
\ No newline at end of file
+    print(sibling)
diff --git a/chapter2/5-findParents.py b/chapter2/5-findParents.py
index d0e4593..50ec5ee 100644
--- a/chapter2/5-findParents.py
+++ b/chapter2/5-findParents.py
@@ -2,5 +2,5 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://www.pythonscraping.com/pages/page3.html")
-bsObj = BeautifulSoup(html)
-print(bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_sibling.get_text())
\ No newline at end of file
+bsObj = BeautifulSoup(html, "html.parser")
+print(bsObj.find("img",{"src":"../img/gifts/img1.jpg"}).parent.previous_sibling.get_text())
diff --git a/chapter2/6-regularExpressions.py b/chapter2/6-regularExpressions.py
index ef12761..285ed98 100644
--- a/chapter2/6-regularExpressions.py
+++ b/chapter2/6-regularExpressions.py
@@ -3,7 +3,7 @@
 import re
 
 html = urlopen("http://www.pythonscraping.com/pages/page3.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 images = bsObj.findAll("img", {"src":re.compile("\.\.\/img\/gifts/img.*\.jpg")})
-for image in images: 
+for image in images:
     print(image["src"])
diff --git a/chapter2/7-lambdaExpressions.py b/chapter2/7-lambdaExpressions.py
index 1704fa9..97a12e6 100644
--- a/chapter2/7-lambdaExpressions.py
+++ b/chapter2/7-lambdaExpressions.py
@@ -1,7 +1,7 @@
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
 html = urlopen("http://www.pythonscraping.com/pages/page2.html")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 tags = bsObj.findAll(lambda tag: len(tag.attrs) == 2)
 for tag in tags:
-	print(tag)
\ No newline at end of file
+	print(tag)
diff --git a/chapter3/1-getWikiLinks.py b/chapter3/1-getWikiLinks.py
index 96ca211..313832a 100644
--- a/chapter3/1-getWikiLinks.py
+++ b/chapter3/1-getWikiLinks.py
@@ -7,10 +7,10 @@
 random.seed(datetime.datetime.now())
 def getLinks(articleUrl):
     html = urlopen("http://en.wikipedia.org"+articleUrl)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
 links = getLinks("/wiki/Kevin_Bacon")
 while len(links) > 0:
     newArticle = links[random.randint(0, len(links)-1)].attrs["href"]
     print(newArticle)
-    links = getLinks(newArticle)
\ No newline at end of file
+    links = getLinks(newArticle)
diff --git a/chapter3/2-crawlWikipedia.py b/chapter3/2-crawlWikipedia.py
index ec62d06..ecdcf87 100644
--- a/chapter3/2-crawlWikipedia.py
+++ b/chapter3/2-crawlWikipedia.py
@@ -6,14 +6,14 @@
 def getLinks(pageUrl):
     global pages
     html = urlopen("http://en.wikipedia.org"+pageUrl)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     try:
         print(bsObj.h1.get_text())
         print(bsObj.find(id ="mw-content-text").findAll("p")[0])
         print(bsObj.find(id="ca-edit").find("span").find("a").attrs['href'])
     except AttributeError:
         print("This page is missing something! No worries though!")
-    
+
     for link in bsObj.findAll("a", href=re.compile("^(/wiki/)")):
         if 'href' in link.attrs:
             if link.attrs['href'] not in pages:
@@ -22,4 +22,4 @@ def getLinks(pageUrl):
                 print("----------------\n"+newPage)
                 pages.add(newPage)
                 getLinks(newPage)
-getLinks("") 
\ No newline at end of file
+getLinks("")
diff --git a/chapter3/3-crawlSite.py b/chapter3/3-crawlSite.py
index f34cc47..3615342 100644
--- a/chapter3/3-crawlSite.py
+++ b/chapter3/3-crawlSite.py
@@ -16,7 +16,7 @@ def getInternalLinks(bsObj, includeUrl):
             if link.attrs['href'] not in internalLinks:
                 internalLinks.append(link.attrs['href'])
     return internalLinks
-            
+
 #Retrieves a list of all external links found on a page
 def getExternalLinks(bsObj, excludeUrl):
     externalLinks = []
@@ -34,18 +34,18 @@ def splitAddress(address):
 
 def getRandomExternalLink(startingPage):
     html = urlopen(startingPage)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     externalLinks = getExternalLinks(bsObj, splitAddress(startingPage)[0])
     if len(externalLinks) == 0:
-        internalLinks = getInternalLinks(startingPage)
-        return getNextExternalLink(internalLinks[random.randint(0, 
+        internalLinks = getInternalLinks(bsObj, startingPage)
+        return getExternalLinks(bsObj, internalLinks[random.randint(0,
                                   len(internalLinks)-1)])
     else:
         return externalLinks[random.randint(0, len(externalLinks)-1)]
-    
+
 def followExternalOnly(startingSite):
-    externalLink = getRandomExternalLink("http://oreilly.com")
+    externalLink = getRandomExternalLink(startingSite)
     print("Random external link is: "+externalLink)
     followExternalOnly(externalLink)
-            
-followExternalOnly("http://oreilly.com")
\ No newline at end of file
+
+followExternalOnly("http://oreilly.com")
diff --git a/chapter3/4-getExternalLinks.py b/chapter3/4-getExternalLinks.py
index 54fb854..b4fb296 100644
--- a/chapter3/4-getExternalLinks.py
+++ b/chapter3/4-getExternalLinks.py
@@ -21,7 +21,7 @@ def getInternalLinks(bsObj, includeUrl):
                 else:
                     internalLinks.append(link.attrs['href'])
     return internalLinks
-            
+
 #Retrieves a list of all external links found on a page
 def getExternalLinks(bsObj, excludeUrl):
     externalLinks = []
@@ -36,7 +36,7 @@ def getExternalLinks(bsObj, excludeUrl):
 
 def getRandomExternalLink(startingPage):
     html = urlopen(startingPage)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     externalLinks = getExternalLinks(bsObj, urlparse(startingPage).netloc)
     if len(externalLinks) == 0:
         print("No external links, looking around the site for one")
@@ -45,7 +45,7 @@ def getRandomExternalLink(startingPage):
         return getRandomExternalLink(internalLinks[random.randint(0,len(internalLinks)-1)])
     else:
         return externalLinks[random.randint(0, len(externalLinks)-1)]
-    
+
 def followExternalOnly(startingSite):
     externalLink = getRandomExternalLink(startingSite)
     print("Random external link is: "+externalLink)
diff --git a/chapter3/5-getAllExternalLinks.py b/chapter3/5-getAllExternalLinks.py
index c08b555..80e40f9 100644
--- a/chapter3/5-getAllExternalLinks.py
+++ b/chapter3/5-getAllExternalLinks.py
@@ -21,7 +21,7 @@ def getInternalLinks(bsObj, includeUrl):
                 else:
                     internalLinks.append(link.attrs['href'])
     return internalLinks
-            
+
 #Retrieves a list of all external links found on a page
 def getExternalLinks(bsObj, excludeUrl):
     externalLinks = []
@@ -36,7 +36,7 @@ def getExternalLinks(bsObj, excludeUrl):
 
 def getRandomExternalLink(startingPage):
     html = urlopen(startingPage)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     externalLinks = getExternalLinks(bsObj, urlparse(startingPage).netloc)
     if len(externalLinks) == 0:
         print("No external links, looking around the site for one")
@@ -45,12 +45,12 @@ def getRandomExternalLink(startingPage):
         return getRandomExternalLink(internalLinks[random.randint(0,len(internalLinks)-1)])
     else:
         return externalLinks[random.randint(0, len(externalLinks)-1)]
-    
+
 def followExternalOnly(startingSite):
     externalLink = getRandomExternalLink(startingSite)
     print("Random external link is: "+externalLink)
     followExternalOnly(externalLink)
-            
+
 #Collects a list of all external URLs found on the site
 allExtLinks = set()
 allIntLinks = set()
diff --git a/chapter3/scrapy/wikiSpider/wiki.log b/chapter3/scrapy/wikiSpider/wiki.log
deleted file mode 100644
index 999a381..0000000
--- a/chapter3/scrapy/wikiSpider/wiki.log
+++ /dev/null
@@ -1,18 +0,0 @@
-2015-03-09 00:11:36-0400 [scrapy] INFO: Scrapy 0.24.4 started (bot: wikiSpider)
-2015-03-09 00:11:36-0400 [scrapy] INFO: Optional features available: ssl, http11
-2015-03-09 00:11:36-0400 [scrapy] INFO: Overridden settings: {'NEWSPIDER_MODULE': 'wikiSpider.spiders', 'SPIDER_MODULES': ['wikiSpider.spiders'], 'LOG_FILE': 'wiki.log', 'BOT_NAME': 'wikiSpider'}
-2015-03-09 00:11:36-0400 [scrapy] INFO: Enabled extensions: LogStats, TelnetConsole, CloseSpider, WebService, CoreStats, SpiderState
-2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats
-2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware
-2015-03-09 00:11:37-0400 [scrapy] INFO: Enabled item pipelines: 
-2015-03-09 00:11:37-0400 [article] INFO: Spider opened
-2015-03-09 00:11:37-0400 [article] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
-2015-03-09 00:11:37-0400 [scrapy] DEBUG: Telnet console listening on 127.0.0.1:6023
-2015-03-09 00:11:37-0400 [scrapy] DEBUG: Web service listening on 127.0.0.1:6080
-2015-03-09 00:11:37-0400 [article] DEBUG: Crawled (200) <GET http://en.wikipedia.org/wiki/Python_%28programming_language%29> (referer: None)
-2015-03-09 00:11:37-0400 [scrapy] INFO: Received SIGINT, shutting down gracefully. Send again to force 
-2015-03-09 00:11:37-0400 [article] INFO: Closing spider (shutdown)
-2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'en.wikibooks.org': <GET http://en.wikibooks.org/wiki/Python_Programming>
-2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'code.google.com': <GET https://code.google.com/p/unladen-swallow/wiki/ProjectPlan>
-2015-03-09 00:11:37-0400 [article] DEBUG: Filtered offsite request to 'en.wikiquote.org': <GET http://en.wikiquote.org/wiki/Python>
-2015-03-09 00:11:37-0400 [scrapy] INFO: Received SIGINT twice, forcing unclean shutdown
diff --git a/chapter3/scrapy/wikiSpider/wikiSpider/__init__.pyc b/chapter3/scrapy/wikiSpider/wikiSpider/__init__.pyc
index 2aea624..1c09f02 100644
Binary files a/chapter3/scrapy/wikiSpider/wikiSpider/__init__.pyc and b/chapter3/scrapy/wikiSpider/wikiSpider/__init__.pyc differ
diff --git a/chapter3/scrapy/wikiSpider/wikiSpider/items.pyc b/chapter3/scrapy/wikiSpider/wikiSpider/items.pyc
index d80c209..eef6e2b 100644
Binary files a/chapter3/scrapy/wikiSpider/wikiSpider/items.pyc and b/chapter3/scrapy/wikiSpider/wikiSpider/items.pyc differ
diff --git a/chapter3/scrapy/wikiSpider/wikiSpider/settings.pyc b/chapter3/scrapy/wikiSpider/wikiSpider/settings.pyc
index c69c338..12ab1a3 100644
Binary files a/chapter3/scrapy/wikiSpider/wikiSpider/settings.pyc and b/chapter3/scrapy/wikiSpider/wikiSpider/settings.pyc differ
diff --git a/chapter3/scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc b/chapter3/scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc
index 78b171c..164d22b 100644
Binary files a/chapter3/scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc and b/chapter3/scrapy/wikiSpider/wikiSpider/spiders/__init__.pyc differ
diff --git a/chapter3/scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc b/chapter3/scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc
index 39fe60f..fe50d8c 100644
Binary files a/chapter3/scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc and b/chapter3/scrapy/wikiSpider/wikiSpider/spiders/articleSpider.pyc differ
diff --git a/chapter4/6-wikiHistories-Chinese.py b/chapter4/6-wikiHistories-Chinese.py
new file mode 100644
index 0000000..89169c6
--- /dev/null
+++ b/chapter4/6-wikiHistories-Chinese.py
@@ -0,0 +1,61 @@
+from urllib.request import urlopen
+from urllib.request import HTTPError
+from bs4 import BeautifulSoup
+import datetime
+import json
+import random
+import re
+
+random.seed(datetime.datetime.now())
+def getLinks(articleUrl):
+    html = urlopen("http://en.wikipedia.org"+articleUrl)
+    bsObj = BeautifulSoup(html, "html.parser")
+    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
+
+def getHistoryIPs(pageUrl):
+    #Format of revision history pages is:
+    #http://en.wikipedia.org/w/index.php?title=Title_in_URL&action=history
+    pageUrl = pageUrl.replace("/wiki/", "")
+    historyUrl = "http://en.wikipedia.org/w/index.php?title="+pageUrl+"&action=history"
+    print("history url is: "+historyUrl)
+    html = urlopen(historyUrl)
+    bsObj = BeautifulSoup(html, "html.parser")
+    #finds only the links with class "mw-anonuserlink" which has IP addresses
+    #instead of usernames
+    ipAddresses = bsObj.findAll("a", {"class":"mw-anonuserlink"})
+    addressList = set()
+    for ipAddress in ipAddresses:
+        addressList.add(ipAddress.get_text())
+    return addressList
+
+
+def getCountry(ipAddress):
+    try:
+        html = urlopen("http://www.ip138.com/ips1388.asp?action=2&ip="+ipAddress).read().decode('gb2312')
+    except HTTPError:
+        return None
+    try:
+        bsObj = BeautifulSoup(html, "html.parser")
+        try:
+            response = bsObj.findAll( text=re.compile("："))[0].split("：")[2]
+        except IndexError:
+            response = bsObj.findAll( text=re.compile("数据"))[0:2]
+    except AttributeError:
+        return None
+
+    return str(response)
+
+links = getLinks("/wiki/Python_(programming_language)")
+
+
+while(len(links) > 0):
+    for link in links:
+        print("-------------------")
+        historyIPs = getHistoryIPs(link.attrs["href"])
+        for historyIP in historyIPs:
+            country = getCountry(historyIP)
+            if country is not None:
+                print(historyIP+" is from "+country)
+
+    newLink = links[random.randint(0, len(links)-1)].attrs["href"]
+    links = getLinks(newLink)
diff --git a/chapter4/6-wikiHistories-no-locations.py b/chapter4/6-wikiHistories-no-locations.py
new file mode 100644
index 0000000..c727f04
--- /dev/null
+++ b/chapter4/6-wikiHistories-no-locations.py
@@ -0,0 +1,44 @@
+from urllib.request import urlopen
+from urllib.request import HTTPError
+from bs4 import BeautifulSoup
+import datetime
+import json
+import random
+import re
+
+random.seed(datetime.datetime.now())
+def getLinks(articleUrl):
+    html = urlopen("http://en.wikipedia.org"+articleUrl)
+    bsObj = BeautifulSoup(html, "html.parser")
+    return bsObj.find("div", {"id":"bodyContent"}).findAll("a", href=re.compile("^(/wiki/)((?!:).)*$"))
+
+def getHistoryIPs(pageUrl):
+    #Format of revision history pages is:
+    #http://en.wikipedia.org/w/index.php?title=Title_in_URL&action=history
+    pageUrl = pageUrl.replace("/wiki/", "")
+    historyUrl = "http://en.wikipedia.org/w/index.php?title="+pageUrl+"&action=history"
+    print("history url is: "+historyUrl)
+    html = urlopen(historyUrl)
+    bsObj = BeautifulSoup(html, "html.parser")
+    #finds only the links with class "mw-anonuserlink" which has IP addresses
+    #instead of usernames
+    ipAddresses = bsObj.findAll("a", {"class":"mw-anonuserlink"})
+    addressList = set()
+    for ipAddress in ipAddresses:
+        addressList.add(ipAddress.get_text())
+    return addressList
+
+
+
+links = getLinks("/wiki/Python_(programming_language)")
+
+
+while(len(links) > 0):
+    for link in links:
+        print("-------------------")
+        historyIPs = getHistoryIPs(link.attrs["href"])
+        for historyIP in historyIPs:
+                print(historyIP)
+
+    newLink = links[random.randint(0, len(links)-1)].attrs["href"]
+    links = getLinks(newLink)
diff --git a/chapter4/google-api-key.txt b/chapter4/google-api-key.txt
new file mode 100644
index 0000000..399d605
--- /dev/null
+++ b/chapter4/google-api-key.txt
@@ -0,0 +1,6 @@
+AIzaSyD9Dns12MuQ0ZtLFh-fvjdlpRSavXw6lRM
+
+Usage:
+curl -d  @google-maps-geoapi-example.json  -H "Content-Type: application/json" -i "https://www.googleapis.com/geolocation/v1/geolocate?key=AIzaSyD9Dns12MuQ0ZtLFh-fvjdlpRSavXw6lRM"
+
+Warning: Replace key=[Your Key Above]
diff --git a/chapter4/google-maps-geoapi-example.json b/chapter4/google-maps-geoapi-example.json
new file mode 100644
index 0000000..da3ab23
--- /dev/null
+++ b/chapter4/google-maps-geoapi-example.json
@@ -0,0 +1,31 @@
+{
+ "homeMobileCountryCode": 310,
+ "homeMobileNetworkCode": 260,
+ "radioType": "gsm",
+ "carrier": "T-Mobile",
+ "cellTowers": [
+  {
+   "cellId": 39627456,
+   "locationAreaCode": 40495,
+   "mobileCountryCode": 310,
+   "mobileNetworkCode": 260,
+   "age": 0,
+   "signalStrength": -95
+  }
+ ],
+ "wifiAccessPoints": [
+  {
+   "macAddress": "01:23:45:67:89:AB",
+   "signalStrength": 8,
+   "age": 0,
+   "signalToNoiseRatio": -65,
+   "channel": 8
+  },
+  {
+   "macAddress": "01:23:45:67:89:AC",
+   "signalStrength": 4,
+   "age": 0
+  }
+ ]
+}
+
diff --git a/chapter5/1-getPageMedia.py b/chapter5/1-getPageMedia.py
index 02869a8..db03cbf 100644
--- a/chapter5/1-getPageMedia.py
+++ b/chapter5/1-getPageMedia.py
@@ -12,7 +12,7 @@ def getAbsoluteURL(baseUrl, source):
     elif source.startswith("http://"):
         url = source
     elif source.startswith("www."):
-        url = source[4:]
+        source = source[4:]
         url = "http://"+source
     else:
         url = baseUrl+"/"+source
@@ -32,11 +32,11 @@ def getDownloadPath(baseUrl, absoluteUrl, downloadDirectory):
     return path
 
 html = urlopen("http://www.pythonscraping.com")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 downloadList = bsObj.findAll(src=True)
 
 for download in downloadList:
     fileUrl = getAbsoluteURL(baseUrl, download["src"])
     if fileUrl is not None:
         print(fileUrl)
-        urlretrieve(fileUrl, getDownloadPath(baseUrl, fileUrl, downloadDirectory))
\ No newline at end of file
+        urlretrieve(fileUrl, getDownloadPath(baseUrl, fileUrl, downloadDirectory))
diff --git a/chapter5/3-scrapeCsv.py b/chapter5/3-scrapeCsv.py
index 2c57942..607dbf4 100644
--- a/chapter5/3-scrapeCsv.py
+++ b/chapter5/3-scrapeCsv.py
@@ -3,12 +3,12 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://en.wikipedia.org/wiki/Comparison_of_text_editors")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 #The main comparison table is currently the first table on the page
 table = bsObj.findAll("table",{"class":"wikitable"})[0]
 rows = table.findAll("tr")
 
-csvFile = open("files/editors.csv", 'wt', newline='', encoding='utf-8')
+csvFile = open("../files/editors.csv", 'wt', newline='', encoding='utf-8')
 writer = csv.writer(csvFile)
 try:
 	for row in rows:
diff --git a/chapter5/4-mysqlBasicExample.py b/chapter5/4-mysqlBasicExample.py
index 9c3a28d..20aa769 100644
--- a/chapter5/4-mysqlBasicExample.py
+++ b/chapter5/4-mysqlBasicExample.py
@@ -1,9 +1,10 @@
 import pymysql
-conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock',
-                       user='root', passwd=None, db='mysql')
+conn = pymysql.connect(host='127.0.0.1', unix_socket='/run/mysqld/mysqld.sock',user='root', passwd=None, db='scraping')
+###conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock',  ### invalid sock location, you can omit this parameter or refer to doc.
+###                    user='root', passwd=None, db='mysql')                ### invalid db name, before selecting db, you need to check your databases
 cur = conn.cursor()
-cur.execute("USE scraping")
+###cur.execute("USE scraping")                                              ### no need to specify db name twice
 cur.execute("SELECT * FROM pages WHERE id=1")
 print(cur.fetchone())
 cur.close()
-conn.close()
\ No newline at end of file
+conn.close()
diff --git a/chapter5/5-storeWikiLinks.py b/chapter5/5-storeWikiLinks.py
index 55440be..7b8157d 100644
--- a/chapter5/5-storeWikiLinks.py
+++ b/chapter5/5-storeWikiLinks.py
@@ -5,9 +5,9 @@
 import random
 import pymysql
 
-conn = pymysql.connect(host='127.0.0.1', unix_socket='/tmp/mysql.sock', user='root', passwd=None, db='mysql', charset='utf8')
+conn = pymysql.connect(host='127.0.0.1', unix_socket='/run/mysqld/mysqld.sock', user='root', passwd=None, db='scraping', charset='utf8')
 cur = conn.cursor()
-cur.execute("USE scraping")
+### cur.execute("USE scraping")
 
 random.seed(datetime.datetime.now())
 
@@ -17,7 +17,7 @@ def store(title, content):
 
 def getLinks(articleUrl):
     html = urlopen("http://en.wikipedia.org"+articleUrl)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     title = bsObj.find("h1").get_text()
     content = bsObj.find("div", {"id":"mw-content-text"}).find("p").get_text()
     store(title, content)
diff --git a/chapter5/6-6DegreesCrawlWiki.py b/chapter5/6-6DegreesCrawlWiki.py
index 8af29db..f1656d1 100644
--- a/chapter5/6-6DegreesCrawlWiki.py
+++ b/chapter5/6-6DegreesCrawlWiki.py
@@ -3,16 +3,16 @@
 import pymysql
 from urllib.request import urlopen
 
-conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='mysql', charset='utf8')
+conn = pymysql.connect(host='127.0.0.1',  user='root', passwd=None, db='wikipedia', charset='utf8')
 cur = conn.cursor()
-cur.execute("USE wikipedia")
+### cur.execute("USE wikipedia")
 
 def pageScraped(url):
     cur.execute("SELECT * FROM pages WHERE url = %s", (url))
     if cur.rowcount == 0:
         return False
     page = cur.fetchone()
-    
+
     cur.execute("SELECT * FROM links WHERE fromPageId = %s", (int(page[0])))
     if cur.rowcount == 0:
         return False
@@ -39,7 +39,7 @@ def getLinks(pageUrl, recursionLevel):
         return
     pageId = insertPageIfNotExists(pageUrl)
     html = urlopen("http://en.wikipedia.org"+pageUrl)
-    bsObj = BeautifulSoup(html)
+    bsObj = BeautifulSoup(html, "html.parser")
     for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")):
         insertLink(pageId, insertPageIfNotExists(link.attrs['href']))
         if not pageScraped(link.attrs['href']):
@@ -47,8 +47,8 @@ def getLinks(pageUrl, recursionLevel):
             newPage = link.attrs['href']
             print(newPage)
             getLinks(newPage, recursionLevel+1)
-        else: 
+        else:
             print("Skipping: "+str(link.attrs['href'])+" found on "+pageUrl)
-getLinks("/wiki/Kevin_Bacon", 0) 
+getLinks("/wiki/Kevin_Bacon", 0)
 cur.close()
 conn.close()
diff --git a/chapter5/8-sendEmailWhenChristmas.py b/chapter5/8-sendEmailWhenChristmas.py
index d738ec3..037662c 100644
--- a/chapter5/8-sendEmailWhenChristmas.py
+++ b/chapter5/8-sendEmailWhenChristmas.py
@@ -14,7 +14,7 @@ def sendMail(subject, body):
 	s.send_message(msg)
 	s.quit()
 
-bsObj = BeautifulSoup(urlopen("https://isitchristmas.com/"))
+bsObj = BeautifulSoup(urlopen("https://isitchristmas.com/"), "html.parser")
 while(bsObj.find("a", {"id":"answer"}).attrs['title'] == "NO"):
     print("It is not Christmas yet.")
     time.sleep(3600)
diff --git a/chapter6/2-getUtf8Text.py b/chapter6/2-getUtf8Text.py
index 5764c3c..c47c7f6 100644
--- a/chapter6/2-getUtf8Text.py
+++ b/chapter6/2-getUtf8Text.py
@@ -2,8 +2,8 @@
 from bs4 import BeautifulSoup
 
 html = urlopen("http://en.wikipedia.org/wiki/Python_(programming_language)")
-bsObj = BeautifulSoup(html)
+bsObj = BeautifulSoup(html, "html.parser")
 content = bsObj.find("div", {"id":"mw-content-text"}).get_text()
 content = bytes(content, "UTF-8")
 content = content.decode("UTF-8")
-print(content)
\ No newline at end of file
+print(content)
diff --git a/chapter6/6-readDocx.py b/chapter6/6-readDocx.py
index 203a9bd..2368852 100644
--- a/chapter6/6-readDocx.py
+++ b/chapter6/6-readDocx.py
@@ -8,7 +8,7 @@
 document = ZipFile(wordFile)
 xml_content = document.read('word/document.xml')
 
-wordObj = BeautifulSoup(xml_content.decode('utf-8'))
+wordObj = BeautifulSoup(xml_content.decode('utf-8'), "html.parser")
 textStrings = wordObj.findAll("w:t")
 for textElem in textStrings:
-    print(textElem.text)
\ No newline at end of file
+    print(textElem.text)
diff --git a/chapter6/from urllib.request import urlopen b/chapter6/from urllib.request import urlopen
deleted file mode 100644
index 52fe6b8..0000000
--- a/chapter6/from urllib.request import urlopen	
+++ /dev/null
@@ -1,10 +0,0 @@
-from urllib.request import urlopen
-from io import StringIO
-import csv
-
-data = urlopen("http://pythonscraping.com/files/MontyPythonAlbums.csv").read().decode('ascii', 'ignore')
-dataFile = StringIO(data)
-csvReader = csv.reader(dataFile)
-
-for row in csvReader:
-print(row)
\ No newline at end of file
diff --git a/chapter6/readPdf.py b/chapter6/readPdf.py
deleted file mode 100644
index c4ecee8..0000000
--- a/chapter6/readPdf.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from pdfminer.pdfinterp import PDFResourceManager, process_pdf
-from pdfminer.converter import TextConverter
-from pdfminer.layout import LAParams
-from io import StringIO
-from io import open
-from urllib.request import urlopen
-
-def readPDF(pdfFile):
-    rsrcmgr = PDFResourceManager()
-    retstr = StringIO()
-    laparams = LAParams()
-    device = TextConverter(rsrcmgr, retstr, laparams=laparams)
-
-    process_pdf(rsrcmgr, device, pdfFile)
-    device.close()
-
-    content = retstr.getvalue()
-    retstr.close()
-    return content
-
-pdfFile = urlopen("http://pythonscraping.com/pages/warandpeace/chapter1.pdf")
-outputString = readPDF(pdfFile)
-print(outputString)
-pdfFile.close()