import urllib
import urllib2
import requests
def getHtml_urllib(url):
page = urllib.urlopen(url)
html = page.read()
# html = unicode(html,'GBK').encode('UTF-8')
print html
#方法一 访问非gbk网站
def getHtml_GBK_urllib2(url):
req = urllib2.Request(url)
response = urllib2.urlopen(req).read()
response = unicode(response,'GBK').encode('UTF-8')
print response
#方法二:访问gbk网站
def getHtml_header(url):
# 构造 Request headers
agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0'
headers = {
# 'Host': "www.t66y.com",
'User-Agent': agent,
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
}
r = requests.get(url,headers=headers)
# r = requests.get(url)
print r.text
url= 'http://www.w3school.com.cn/h.asp'
getHtml_urllib(url)
# getHtml_GBK_urllib2(url)
# getHtml_header(url)
#https://www.baidu.com/?tn=98012088_5_dg&ch=16'
#http://www.dmm.co.jp/digital/videoa/-/ranking/=/term=daily/