python环境:python3
运行环境:win10和linux都可以,其他系统没测
1 安装依赖
pip install requestspip install lxmlpip install feedparser
2 创建一个新文件夹
3 运行该脚本
python mzitu.py
源码如下:
# -*- coding: UTF-8 –*-import feedparserimport requestsfrom lxml import etreeimport threadingimport randomimport os def get_url(): rss_url = 'https://www.mzitu.com/feed/' feeds = feedparser.parse(rss_url) page_url = [] for i in range(20): page_url.append(feeds.entries[i]['link']) return page_url def download(dirname, imgurl): headers = { 'referer':'https://www.mzitu.com/', 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36' } filename = imgurl.split('/')[-1] r = requests.get(imgurl, headers = headers, stream=True) if os.path.exists(dirname): with open(dirname + '/' + filename, 'wb') as f: for chunk in r.iter_content(chunk_size=32): f.write(chunk) print('下载:%s中' % filename) else: os.mkdir(dirname) with open(dirname + '/' + filename, 'wb') as f: for chunk in r.iter_content(chunk_size=32): f.write(chunk) print('下载:%s中' % filename) def get_img(url): r = requests.get(url) page = etree.HTML(r.text) span = page.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span') hs = page.xpath('//h2[@class="main-title"]') for h in hs: title = h.text for a in span: pages = a.text try: for i in range(int(pages)+1): if i == 1: pass else: imgpage = url + '/' + str(i) r1 = requests.get(imgpage) page1 = etree.HTML(r1.text) x_href = page1.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img') for href in x_href: imgurl = href.get('src') download(title, imgurl) except KeyboardInterrupt: pass except: pass def main(): urls = get_url() threads=[] for i in range(len(urls)): t = threading.Thread(target=get_img, args=(urls[0+i],)) threads.append(t) for i in threads: i.start() for i in threads: i.join() if __name__ == '__main__': main()
如果遇到问题,源码请到百度网盘下载; 提取码:7pv8
4 升级版(可下载所有组图)
源码如下:
# -*- coding: UTF-8 鈥?-import feedparserimport requestsfrom lxml import etreeimport threadingimport randomimport os def get_url2(): rss_url = 'https://www.mzitu.com/all/' r = requests.get(rss_url) page = etree.HTML(r.text) result =page.xpath('/html/body/div[2]/div[1]/div[2]/ul/li/p[2]/a') print('鏈?d缁勫浘'%len(result)) page_url = [] for x in result: page_url.append(x.get('href')) #print(x.get('href')) return page_url def download(dirname, imgurl): headers = { 'referer':'https://www.mzitu.com/', 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36' } filename = imgurl.split('/')[-1] r = requests.get(imgurl, headers = headers, stream=True) if os.path.exists(dirname): with open(dirname + '/' + filename, 'wb') as f: for chunk in r.iter_content(chunk_size=32): f.write(chunk) print('涓嬭浇:%s涓? % filename) else: os.mkdir(dirname) with open(dirname + '/' + filename, 'wb') as f: for chunk in r.iter_content(chunk_size=32): f.write(chunk) print('涓嬭浇:%s涓? % filename) def get_img(url): r = requests.get(url) page = etree.HTML(r.text) span = page.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span') hs = page.xpath('//h2[@class="main-title"]') for h in hs: title = h.text for a in span: pages = a.text try: for i in range(int(pages)+1): if i == 1: pass else: imgpage = url + '/' + str(i) r1 = requests.get(imgpage) page1 = etree.HTML(r1.text) x_href = page1.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img') for href in x_href: imgurl = href.get('src') download(title, imgurl) except KeyboardInterrupt: pass except: pass def main(): urls = get_url2() threads=[] for i in range(len(urls)): t = threading.Thread(target=get_img, args=(urls[0+i],)) threads.append(t) for i in threads: i.start() for i in threads: i.join() if __name__ == '__main__': main()
如果遇到问题,源码请到百度网盘下载; 提取码:nxoo
注意:经测试,4 升级版在运行时,会大量占用内存,内存小的电脑估计抗不住。。