"); //-->
import urllib2
import re
def download(url):
print "Downloading:",url
try:
html=urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "Download error:",e.reason
html=None
return html
def crawl_sitemap(url):
sitemap=download(url)
links=re.findall('<a href="(.*?)" title',sitemap)
txt=open("123.txt","w",)
print links
try:
for link in links:
html=download(link)
page=re.findall('<p>(.*?)</p>',html)
# print page
for i in page:
txt.write(i)
txt.write("\n")
except Exception as e:
print "Download error:",e
html=None
txt.close()
url="https://www.douban.com/group/shanghaizufang/"
crawl_sitemap(url)
*博客内容为网友个人发布,仅代表博主个人观点,如有侵权请联系工作人员删除。