python--文本处理

发布时间:2021-08-01 00:15 来源:https://blog.51cto.com/zhengbi 阅读:192 作者:woshizb110 栏目: 云计算 欢迎投稿:712375056

#!/usr/bin/python #coding=utf-8 import os,re import time,datetime,sys canshu=sys.argv[1] rizhi_dx=2 rizhi_open=file(canshu,'r') temp_wenjian=os.path.join(canshu+'_1','temp') if os.path.isfile(temp_wenjian): pass else: os.mkdir(canshu+'_1') temp_1=file(temp_wenjian,'a') temp_1.write('0') temp_1.close() temp=file(temp_wenjian,'r') rizhi_open.seek(int(temp.readline())) temp.close() rizhi_1=rizhi_open.readline(1024) rizhi=[] rizhi_size=0 rizhi_quhang=[] now=int(time.time()) while rizhi_1: if int(rizhi_1.strip('\n').split(',')[0]) < now: print 1 exit() if rizhi_size>rizhi_dx: rizhi_quhang.append(rizhi_1.strip('\n').split(',')[0]) while rizhi_1: rizhi.append(rizhi_1) rizhi_1=rizhi_open.readline(1024) rizhi_quhang.append(rizhi_1.strip('\n').split(',')[0]) if rizhi_quhang[1]==rizhi_quhang[-1]: temp=open(temp_wenjian,'w') temp.write(str(rizhi_open.tell())) temp.close() else: break break rizhi_size=rizhi_size+1 temp=open(temp_wenjian,'w') rizhi.append(rizhi_1) rizhi_1=rizhi_open.readline(1024) temp.write(str(rizhi_open.tell())) temp.close() a=[] #存储第一列时间的值 jj=[-1] #计算同一秒的行数 for i in rizhi: a.append(i.strip('\n').split(',')[0]) for j in range(len(a)-1): if a[j]!=a[j+1]: jj.append(j) #j存储同一秒所在行 jj.append(len(rizhi)-1) for i in range(len(jj)-1): poolSize_time={} #计算poolSize参数中,请求时间的值 poolSize_max={} poolSize_min={} poolSize_num={} poolSize_count={} poolSize_error={} poolSize_qqtm={} poolSize_cwl={} poolSize_ttl={} poolSize_avg={} try: tm=int(time.mktime(datetime.datetime.strptime(a[jj[i]+1],'%Y%m%d%H%M%S').timetuple())) except Exception,e: exit(1) metris=[] broker=[] partition=[] topic=[] for k in range(jj[i]+1,jj[i+1]+1): if rizhi[k].strip('\n').split(',')[1].split(':')[0] not in metris: metris.append(rizhi[k].strip('\n').split(',')[1].split(':')[0]) if rizhi[k].strip('\n').split(',')[1].split(':')[1] not in broker: broker.append(rizhi[k].strip('\n').split(',')[1].split(':')[1]) if rizhi[k].strip('\n').split(',')[1].split(':')[2] not in partition: partition.append(rizhi[k].strip('\n').split(',')[1].split(':')[2]) if rizhi[k].strip('\n').split(',')[1].split(':')[3] not in topic: topic.append(rizhi[k].strip('\n').split(',')[1].split(':')[3]) for ab in range(len(metris)): ab=str(ab) for cd in range(len(partition)): cd=str(cd) for ef in range(len(topic)): ef=str(ef) for gh in range(len(broker)): gh=str(gh) poolSize_num[ab+cd+ef+gh]=float(0) poolSize_time[ab+cd+ef+gh]=float(0) poolSize_max[ab+cd+ef+gh]=0 poolSize_min[ab+cd+ef+gh]=0 poolSize_count[ab+cd+ef+gh]=0 poolSize_error[ab+cd+ef+gh]=0 poolSize_qqtm[ab+cd+ef+gh]=0 poolSize_cwl[ab+cd+ef+gh]=0 poolSize_ttl[ab+cd+ef+gh]=0 poolSize_avg[ab+cd+ef+gh]=0 for k in range(jj[i]+1,jj[i+1]+1): for ab in range(len(metris)): ab=str(ab) for cd in range(len(partition)): cd=str(cd) for ef in range(len(topic)): ef=str(ef) for gh in range(len(broker)): gh=str(gh) if rizhi[k].strip('\n').split(',')[1].split(':')[0]==metris[int(ab)]: if rizhi[k].strip('\n').split(',')[1].split(':')[2]==partition[int(cd)]: if rizhi[k].strip('\n').split(',')[1].split(':')[3]==topic[int(ef)]: if rizhi[k].strip('\n').split(',')[1].split(':')[1]==broker[int(gh)]: poolSize_num[ab+cd+ef+gh]=poolSize_num[ab+cd+ef+gh]+1 poolSize_qqtm[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[3]) if poolSize_max[ab+cd+ef+gh]==0: poolSize_max[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh] poolSize_min[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh] poolSize_time[ab+cd+ef+gh]=int(poolSize_qqtm[ab+cd+ef+gh])+poolSize_time[ab+cd+ef+gh] if poolSize_qqtm[ab+cd+ef+gh] > poolSize_max[ab+cd+ef+gh]: poolSize_max[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh] if poolSize_qqtm[ab+cd+ef+gh] < poolSize_min[ab+cd+ef+gh]: poolSize_min[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh] poolSize_cwl[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[4]) if poolSize_cwl[ab+cd+ef+gh]==0: poolSize_error[ab+cd+ef+gh]=poolSize_error[ab+cd+ef+gh]+1 poolSize_ttl[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[2]) poolSize_count[ab+cd+ef+gh]=poolSize_count[ab+cd+ef+gh]+poolSize_ttl[ab+cd+ef+gh] try: poolSize_avg[ab+cd+ef+gh]=round(poolSize_time[ab+cd+ef+gh]/poolSize_num[ab+cd+ef+gh],1) except Exception,e: poolSize_avg[ab+cd+ef+gh]=0 for ab in range(len(metris)): ab=str(ab) for cd in range(len(partition)): cd=str(cd) for ef in range(len(topic)): ef=str(ef) for gh in range(len(broker)): gh=str(gh) if poolSize_max[ab+cd+ef+gh]!=0: print metris[int(ab)],tm,poolSize_avg[ab+cd+ef+gh],"type=avg","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)]) print metris[int(ab)],tm,poolSize_max[ab+cd+ef+gh],"type=max","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)]) print metris[int(ab)],tm,poolSize_min[ab+cd+ef+gh],"type=min","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)]) print metris[int(ab)],tm,poolSize_count[ab+cd+ef+gh],"type=count","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)]) print metris[int(ab)],tm,poolSize_error[ab+cd+ef+gh],"type=error","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])



每行以逗号分隔,含义分别为:时间,指标,请求次数,请求时间(毫秒),请求成功还是失败(1表示成功,0表示失败)

q-send-succ 1376873699 8 type=max broker=0 partition=2 topic=PAY_DEAD

处理文件的结构如下:

20130823085449,q-send-succ:0:2:PAY_DEAD,1,50,1

20130823085448,q-send-fail:0:0:PAY_A1,1,20,1

q-send-succ 1376873699 2 type=count broker=0 partition=2 topic=PAY_DEAD

指标中以逗号分隔分别为:metris,broker,partition,topic

q-send-succ 1376873699 8 type=min broker=0 partition=2 topic=PAY_DEAD

python代码如下(主要是熟悉,文件打开关闭,tell,seek,split和strip的使用):


20130823085448,q-send-succ:0:2:PAY_DEAD,1,8,1

要求输出为:

q-send-succ 1376873699 8.0 type=avg broker=0 partition=2 topic=PAY_DEAD


q-send-succ 1376873699 0 type=error broker=0 partition=2 topic=PAY_DEAD

20130823085449,q-send-succ:0:2:PAY_DEAD,1,200,1

免责声明:本站发布的内容(图片、视频和文字)以原创、来自本网站内容采集于网络互联网转载等其它媒体和分享为主,内容观点不代表本网站立场,如侵犯了原作者的版权,请告知一经查实,将立刻删除涉嫌侵权内容,联系我们QQ:712375056,同时欢迎投稿传递力量。