使用requests库和正则表达式爬取段子并保存到.txt文件
lianjie:
import reimport requestsimport sysreload(sys)sys.setdefaultencoding("utf-8")url="http://hahahahhaahah.com/"# url=""header = {'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}haha = requests.get(url,headers=header)haha.encoding='utf-8' # print haha.textheihei=re.findall('(.*?)
',haha.text,re.S)fp=open('neihan.txt', 'wb')# fp.write(heihei.text)for each in heihei: print each print '-'*100 fp.write(each) fp.write("\n\n") 防止被覆盖fp.close()