&&&&&
# -*- coding: utf-8 -* from bs4 import BeautifulSoup import requests from xlwt import Workbook import time import datetime import sys reload(sys) sys.setdefaultencoding('utf-8') def shuju(url,date,page): """ 提取指定公司,指定日期、指定页面的自动监测数据 """ fromdata = { "startTime":date, "pageIndex":page, } try: response = requests.post(url, data=fromdata) except: time.sleep(2) response = requests.post(url, data=fromdata) soup = BeautifulSoup(response.text, 'lxml') shujulist = soup.find_all('tr') datalist = [] for shuju in shujulist: try: linedata = shuju.find_all('td') lin01 = linedata[0].text.split()[0] lin02 = linedata[1].text.split()[0] lin03 = linedata[2].text.split()[0] lin04 = linedata[3].text.split()[0] lin05 = linedata[4].text.split()[0] lin06 = linedata[5].text.split()[0] lin07 = linedata[6].text.split()[0] lin08 = linedata[7].text.split()[0] try: lin09 = linedata[8].text.split()[0] except: lin09 = '' lin10 = linedata[9].text.split()[0] lin11 = linedata[10].text.split()[0] lin12 = linedata[11].text.split()[0] try: lin13 = linedata[12].text.split()[0] except: lin13 = '' data = [lin01,lin02,lin03,lin04,lin05,lin06,lin07,lin08,lin09,lin10,lin11,lin12,lin13] datalist.append(data) except: pass return datalist def pageNumber(url,date): """ 返回公司指定日期自动监测数据的页数,便于for循环遍历 """ fromdata = { "startTime":date, "pageIndex":"", } try: response = requests.post(url, data=fromdata) except: time.sleep(1) response = requests.post(url, data=fromdata) soup = BeautifulSoup(response.text,'lxml') number = soup.find('span',class_="clr_b ver_mid").string.split('/')[1][0] compname = soup.find('div',class_="com_tit_new f_22 clr_3").string return number,compname def Date_list_generation(start,end): """ 生成指定日期段的一个列表 """ datelist = [] datestart = datetime.datetime.strptime(str(start), '%Y-%m-%d') dateend = datetime.datetime.strptime(str(end), '%Y-%m-%d') while datestart < dateend: datestart += datetime.timedelta(days=1) datelist.append(datestart.strftime('%Y-%m-%d')) return datelist def pao(start,end,url): book = Workbook(encoding='utf-8') sheet1 = book.add_sheet('Sheet 1') sheet1.write(0, 0, u'序号') sheet1.write(0, 1, u'监测点位') sheet1.write(0, 2, u'监测时间') sheet1.write(0, 3, u'监测项目') sheet1.write(0, 4, u'监测结果') sheet1.write(0, 5, u'标准限值') sheet1.write(0, 6, u'单位') sheet1.write(0, 7, u'是否达标') sheet1.write(0, 8, u'超标倍数') sheet1.write(0, 9, u'评价标准') sheet1.write(0, 10, u'排放去向') sheet1.write(0, 11, u'排放方式') sheet1.write(0, 12, u'备注') datalistnew = [] for date in Date_list_generation(start, end): pagenumber, compname = pageNumber(url, date) for page in range(1, int(pagenumber) + 1): try: datalist = shuju(url, date, page) print date, page time.sleep(0.8) except: print page datalistnew = datalistnew + datalist time.sleep(0.8) datalist = datalistnew for data in range(0, len(datalist)): culumn01 = datalist[data][0] culumn02 = datalist[data][1] culumn03 = datalist[data][2] culumn04 = datalist[data][3] culumn05 = datalist[data][4] culumn06 = datalist[data][5] culumn07 = datalist[data][6] culumn08 = datalist[data][7] culumn09 = datalist[data][8] culumn10 = datalist[data][9] culumn11 = datalist[data][10] culumn12 = datalist[data][11] culumn13 = datalist[data][12] sheet1.write(data + 1, 0, culumn01) sheet1.write(data + 1, 1, culumn02) sheet1.write(data + 1, 2, culumn03) sheet1.write(data + 1, 3, culumn04) sheet1.write(data + 1, 4, culumn05) sheet1.write(data + 1, 5, culumn06) sheet1.write(data + 1, 6, culumn07) sheet1.write(data + 1, 7, culumn08) sheet1.write(data + 1, 8, culumn09) sheet1.write(data + 1, 9, culumn10) sheet1.write(data + 1, 10, culumn11) sheet1.write(data + 1, 11, culumn12) sheet1.write(data + 1, 12, culumn13) tablename = "%s_%s_%s.xls" % (compname, start, end) book.save(tablename) if __name__ == "__main__": start = "2017-05-01" end = "2017-06-01" url = "" pao(start,end,url)&&&&&
免责声明:本站发布的内容(图片、视频和文字)以原创、来自互联网转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:ts@56dr.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。
Copyright © 2009-2021 56dr.com. All Rights Reserved. 特网科技 版权所有 珠海市特网科技有限公司 粤ICP备16109289号
域名注册服务机构:阿里云计算有限公司(万网) 域名服务机构:烟台帝思普网络科技有限公司(DNSPod) CDN服务:阿里云计算有限公司 中国互联网举报中心 增值电信业务经营许可证B2 建议您使用Chrome、Firefox、Edge、IE10及以上版本和360等主流浏览器浏览本网站