I am stuck on Beginning to Extract Data in the Crawl the Web with Python course.
Here is the code…
from lxml import html
import requests
class AppCrawler:
def __init__(self, starting_url, depth):
self.starting_url = starting_url
self.depth = depth
self.apps = []
def crawl(self):
self.get_app_from_link(self.starting_url)
return
def get_app_from_link(self, link):
start_page = requests.get(link)
print start_page.text
return
class App:
def __init__(self, name, developer, price, links):
self.name = name
self.developer = developer
self.price = price
self.links = links
def __str__():
return("Name: " + self.name.encode('UTF-8') +
"\r\nDeveloper: " + self.developer.encode('UTF-8') +
"\r\nPrice: " + self.price.encode('UTF-8') + "\r\n")
crawler = AppCrawler('http://itunes.apple.com/us/app/candy-crush-saga/id553834731', 0)
crawler.crawl()
for app in crawler.apps:
print app
And the console log…
C:\Users\HP\Documents\Web Crawling>python spider.py
C:\Python27\lib\site-packages\requests\packages\urllib3\util\ssl_.py:315: SNIMissingWarning: An HTTPS request has been made, but the SNI (Subject Name Indication) extension to TLS is not available on this platform. This may cause the server to present an incorrect TLS certificate, which can cause validation failures. For more information, see https://urllib3.readthedocs.org/en/latest/security.html#snimissingwarning.
SNIMissingWarning
C:\Python27\lib\site-packages\requests\packages\urllib3\util\ssl_.py:120: InsecurePlatformWarning: A true SSLContext object is not available. This prevents urllib3 from configuring SSL appropriately and may cause certain SSL connections to fail. For more information, see https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning.
InsecurePlatformWarning
Traceback (most recent call last):
File “spider.py”, line 32, in
crawler.crawl()
File “spider.py”, line 11, in crawl
self.get_app_from_link(self.starting_url)
File “spider.py”, line 16, in get_app_from_link
print start_page.text
File “C:\Python27\lib\encodings\cp437.py”, line 12, in encode
return codecs.charmap_encode(input,errors,encoding_map)
UnicodeEncodeError: ‘charmap’ codec can’t encode character u’\u2019’ in position 43280: character maps to
Please help…
Anyhow, I resolved it by searching for SNIMIssingWarning and then installed something via pip.