import urllib
import urllib2
import requests
def getHtml_urllib(url):

    page = urllib.urlopen(url)
    html = page.read()
    # html = unicode(html,'GBK').encode('UTF-8')
    print html
#方法一 访问非gbk网站

def getHtml_GBK_urllib2(url):


    req = urllib2.Request(url)
    response = urllib2.urlopen(req).read()
    response = unicode(response,'GBK').encode('UTF-8')
    print response
#方法二：访问gbk网站

def getHtml_header(url):

    # 构造 Request headers
    agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0'
    headers = {
        # 'Host': "www.t66y.com",
        'User-Agent': agent,
        'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"

    }

    r = requests.get(url,headers=headers)
    # r = requests.get(url)
    print r.text



url= 'http://www.w3school.com.cn/h.asp'


getHtml_urllib(url)

# getHtml_GBK_urllib2(url)
# getHtml_header(url)


#https://www.baidu.com/?tn=98012088_5_dg&ch=16'
#http://www.dmm.co.jp/digital/videoa/-/ranking/=/term=daily/
3.python code

results matching ""

No results matching ""