pythonץȡͼƬʾÀý
2018-07-20 À´Ô´£ºopen-open
#!/usr/bin/python
# -*- coding:utf-8 -*-
import re
import os
import urllib, urllib2, cookielib
import shutil
from BeautifulSoup import BeautifulSoup
# ---- utils ----
def normalize_url(url):
return "http://" + url if cmp(url[0:7],"http://") != 0 else url
def safeDir(dir):
return dir.replace('/', '')
# ---- variable ----
homepagePrefix = "http://60dxw.comww1.baisex.me/forum-47-"
homepageSuffix = ".html"
threadPrefix = "http://60dxw.comww1.baisex.me/"
homedir = "baixingge"
# ---- login ----
cookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie)
# ---- file ----
if (os.path.exists(homedir) == False):
os.mkdir(homedir)
os.chdir(homedir)
# ---- crawl ----
for page in range(1, 25):
pageUrl = '{0}{1}{2}'.format(homepagePrefix,page,homepageSuffix)
# ---- mkdir ----
if (os.path.exists(str(page)) == False):
os.mkdir(str(page))
os.chdir(str(page))
print pageUrl
# ---- download ----
html_body = urllib.urlopen(pageUrl).read()
soup = BeautifulSoup(html_body)
# ---- extract ----
threaddUrls = []
urlRaws = soup.findAll('th', attrs = {'class' : ['new', 'common']})
urlPattern = re.compile(r'href="([^"]*)"')
titlePattern = re.compile(r'>([^<]*)</a>')
for urlRaw in urlRaws:
h = urlPattern.search(str(urlRaw))
t = titlePattern.search(str(urlRaw))
threadUrl = h.group(1)
threadTitle = t.group(1)
if (os.path.exists(threadTitle) == False):
os.mkdir(safeDir(threadTitle))
else:
continue
os.chdir(safeDir(threadTitle))
page_url = threadPrefix + threadUrl
print "---->{0}".format(page_url)
print "---->{0}".format(safeDir(threadTitle))
page_body = urllib.urlopen(page_url).read()
page_soup = BeautifulSoup(page_body)
imgPattern = re.compile(r'img src="([^"]*)" onload')
i = imgPattern.findall(str(page_soup))
index = 0
for img in i:
print "-------->{0}".format(img)
imgSuffix = img[img.rindex('.'):]
imgName = "{0}{1}".format(str(index), imgSuffix)
urllib.urlretrieve(img, imgName, None)
index += 1
os.chdir("../")
os.chdir("../")
±êÇ©£º
°æÈ¨ÉêÃ÷£º±¾Õ¾ÎÄÕ²¿·Ö×ÔÍøÂ磬ÈçÓÐÇÖȨ£¬ÇëÁªÏµ£ºwest999com@outlook.com
ÌØ±ð×¢Ò⣺±¾Õ¾ËùÓÐ×ªÔØÎÄÕÂÑÔÂÛ²»´ú±í±¾Õ¾¹Ûµã£¡
±¾Õ¾ËùÌṩµÄͼƬµÈËØ²Ä£¬°æÈ¨¹éÔ×÷ÕßËùÓУ¬ÈçÐèʹÓã¬ÇëÓëÔ×÷ÕßÁªÏµ¡£
ÉÏһƪ:½ØÈ¡ÖÐÎÄ×Ö·û´®PHP´úÂë
ÏÂһƪ:ͼƬËõ·ÅˮӡPHPÀà
×îÐÂ×ÊѶ
- SEOµÄÓû§»¥¶¯£ºÂÛÔÚÏ߯ÀÂÛÄÚÈݵÄÖØ
- 3¸ö±»ºöÊÓ²ßÂÔ°ïÄãÌáÉýµçÉÌÍøÕ¾Á÷Á¿
- ÈçºÎÕÒµ½²¢¸Ä½øÄÇЩЧ¹û²»¼ÑµÄÈë¿Ú
- Ïêϸ˵˵Ŀ±ê¹Ø¼ü´Ê
- ˵˵ÄÇЩÄܹ»ÔÚÈý¸öÔÂ×öµ½10ÍòIPµÄ
- Á÷Á¿ÎªÍõÊÇSEO˼άµÄ¶¾Ò©
- ͸ÎöÍøÕ¾Ìø³öÂÊ£ºÒ³ÃæÖÊÁ¿ºÜÖØÒª£¡
- °Ù¶ÈÁãλÖÃÅÅÃû£¬ÈçºÎÀûÓÃÆä¿ìËÙ»ñ
- ¸É»õ£º4´ó±êÌâÓÅ»¯¼¼ÇÉÔÀíÒý±¬×ÔÈ»
- ÍøÕ¾°Ù¶È¿ìÕÕʱ¼ä²»¸üлòµ¹ÍËÔõô
ÈÈÃÅÍÆ¼ö