MyFunc函數抓取妳指定的url,並提取了其中的href鏈接,圖片的獲取類似,壹般是<img src=xxx>這樣的形式,其他的功能應該也不難,去網上搜下應該有些例子。
import re
from urllib import FancyURLopener
from random import choice
user_agents = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
class MyOpener(FancyURLopener, object):
version = choice(user_agents)
def MyFunc(url):
myopener = MyOpener()
s = myopener.open(url).read()
ss=s.replace("\n"," ")
urls=re.findall(r"<a.*?href=.*?<\/a>",ss,re.I)#尋找href鏈接
for i in urls:
do sth.