python--文本处理
发布时间:2021-08-01 00:15
来源:https://blog.51cto.com/zhengbi
阅读:192
作者:woshizb110
栏目: 云计算
#!/usr/bin/python
#coding=utf-8
import os,re
import time,datetime,sys
canshu=sys.argv[1]
rizhi_dx=2
rizhi_open=file(canshu,'r')
temp_wenjian=os.path.join(canshu+'_1','temp')
if os.path.isfile(temp_wenjian):
pass
else:
os.mkdir(canshu+'_1')
temp_1=file(temp_wenjian,'a')
temp_1.write('0')
temp_1.close()
temp=file(temp_wenjian,'r')
rizhi_open.seek(int(temp.readline()))
temp.close()
rizhi_1=rizhi_open.readline(1024)
rizhi=[]
rizhi_size=0
rizhi_quhang=[]
now=int(time.time())
while rizhi_1:
if int(rizhi_1.strip('\n').split(',')[0]) < now:
print 1
exit()
if rizhi_size>rizhi_dx:
rizhi_quhang.append(rizhi_1.strip('\n').split(',')[0])
while rizhi_1:
rizhi.append(rizhi_1)
rizhi_1=rizhi_open.readline(1024)
rizhi_quhang.append(rizhi_1.strip('\n').split(',')[0])
if rizhi_quhang[1]==rizhi_quhang[-1]:
temp=open(temp_wenjian,'w')
temp.write(str(rizhi_open.tell()))
temp.close()
else:
break
break
rizhi_size=rizhi_size+1
temp=open(temp_wenjian,'w')
rizhi.append(rizhi_1)
rizhi_1=rizhi_open.readline(1024)
temp.write(str(rizhi_open.tell()))
temp.close()
a=[]
#存储第一列时间的值
jj=[-1]
#计算同一秒的行数
for i in rizhi:
a.append(i.strip('\n').split(',')[0])
for j in range(len(a)-1):
if a[j]!=a[j+1]:
jj.append(j)
#j存储同一秒所在行
jj.append(len(rizhi)-1)
for i in range(len(jj)-1):
poolSize_time={}
#计算poolSize参数中,请求时间的值
poolSize_max={}
poolSize_min={}
poolSize_num={}
poolSize_count={}
poolSize_error={}
poolSize_qqtm={}
poolSize_cwl={}
poolSize_ttl={}
poolSize_avg={}
try:
tm=int(time.mktime(datetime.datetime.strptime(a[jj[i]+1],'%Y%m%d%H%M%S').timetuple()))
except Exception,e:
exit(1)
metris=[]
broker=[]
partition=[]
topic=[]
for k in range(jj[i]+1,jj[i+1]+1):
if rizhi[k].strip('\n').split(',')[1].split(':')[0] not in metris:
metris.append(rizhi[k].strip('\n').split(',')[1].split(':')[0])
if rizhi[k].strip('\n').split(',')[1].split(':')[1] not in broker:
broker.append(rizhi[k].strip('\n').split(',')[1].split(':')[1])
if rizhi[k].strip('\n').split(',')[1].split(':')[2] not in partition:
partition.append(rizhi[k].strip('\n').split(',')[1].split(':')[2])
if rizhi[k].strip('\n').split(',')[1].split(':')[3] not in topic:
topic.append(rizhi[k].strip('\n').split(',')[1].split(':')[3])
for ab in range(len(metris)):
ab=str(ab)
for cd in range(len(partition)):
cd=str(cd)
for ef in range(len(topic)):
ef=str(ef)
for gh in range(len(broker)):
gh=str(gh)
poolSize_num[ab+cd+ef+gh]=float(0)
poolSize_time[ab+cd+ef+gh]=float(0)
poolSize_max[ab+cd+ef+gh]=0
poolSize_min[ab+cd+ef+gh]=0
poolSize_count[ab+cd+ef+gh]=0
poolSize_error[ab+cd+ef+gh]=0
poolSize_qqtm[ab+cd+ef+gh]=0
poolSize_cwl[ab+cd+ef+gh]=0
poolSize_ttl[ab+cd+ef+gh]=0
poolSize_avg[ab+cd+ef+gh]=0
for k in range(jj[i]+1,jj[i+1]+1):
for ab in range(len(metris)):
ab=str(ab)
for cd in range(len(partition)):
cd=str(cd)
for ef in range(len(topic)):
ef=str(ef)
for gh in range(len(broker)):
gh=str(gh)
if rizhi[k].strip('\n').split(',')[1].split(':')[0]==metris[int(ab)]:
if rizhi[k].strip('\n').split(',')[1].split(':')[2]==partition[int(cd)]:
if rizhi[k].strip('\n').split(',')[1].split(':')[3]==topic[int(ef)]:
if rizhi[k].strip('\n').split(',')[1].split(':')[1]==broker[int(gh)]:
poolSize_num[ab+cd+ef+gh]=poolSize_num[ab+cd+ef+gh]+1
poolSize_qqtm[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[3])
if poolSize_max[ab+cd+ef+gh]==0:
poolSize_max[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh]
poolSize_min[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh]
poolSize_time[ab+cd+ef+gh]=int(poolSize_qqtm[ab+cd+ef+gh])+poolSize_time[ab+cd+ef+gh]
if poolSize_qqtm[ab+cd+ef+gh] > poolSize_max[ab+cd+ef+gh]:
poolSize_max[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh]
if poolSize_qqtm[ab+cd+ef+gh] < poolSize_min[ab+cd+ef+gh]:
poolSize_min[ab+cd+ef+gh]=poolSize_qqtm[ab+cd+ef+gh]
poolSize_cwl[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[4])
if poolSize_cwl[ab+cd+ef+gh]==0:
poolSize_error[ab+cd+ef+gh]=poolSize_error[ab+cd+ef+gh]+1
poolSize_ttl[ab+cd+ef+gh]=int(rizhi[k].strip('\n').split(',')[2])
poolSize_count[ab+cd+ef+gh]=poolSize_count[ab+cd+ef+gh]+poolSize_ttl[ab+cd+ef+gh]
try:
poolSize_avg[ab+cd+ef+gh]=round(poolSize_time[ab+cd+ef+gh]/poolSize_num[ab+cd+ef+gh],1)
except Exception,e:
poolSize_avg[ab+cd+ef+gh]=0
for ab in range(len(metris)):
ab=str(ab)
for cd in range(len(partition)):
cd=str(cd)
for ef in range(len(topic)):
ef=str(ef)
for gh in range(len(broker)):
gh=str(gh)
if poolSize_max[ab+cd+ef+gh]!=0:
print metris[int(ab)],tm,poolSize_avg[ab+cd+ef+gh],"type=avg","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])
print metris[int(ab)],tm,poolSize_max[ab+cd+ef+gh],"type=max","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])
print metris[int(ab)],tm,poolSize_min[ab+cd+ef+gh],"type=min","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])
print metris[int(ab)],tm,poolSize_count[ab+cd+ef+gh],"type=count","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])
print metris[int(ab)],tm,poolSize_error[ab+cd+ef+gh],"type=error","broker=%s partition=%s topic=%s" %(broker[int(gh)],partition[int(cd)],topic[int(ef)])
每行以逗号分隔,含义分别为:时间,指标,请求次数,请求时间(毫秒),请求成功还是失败(1表示成功,0表示失败)
q-send-succ 1376873699 8 type=max broker=0 partition=2 topic=PAY_DEAD
处理文件的结构如下:
20130823085449,q-send-succ:0:2:PAY_DEAD,1,50,1
20130823085448,q-send-fail:0:0:PAY_A1,1,20,1
q-send-succ 1376873699 2 type=count broker=0 partition=2 topic=PAY_DEAD
指标中以逗号分隔分别为:metris,broker,partition,topic
q-send-succ 1376873699 8 type=min broker=0 partition=2 topic=PAY_DEAD
python代码如下(主要是熟悉,文件打开关闭,tell,seek,split和strip的使用):
20130823085448,q-send-succ:0:2:PAY_DEAD,1,8,1
要求输出为:
q-send-succ 1376873699 8.0 type=avg broker=0 partition=2 topic=PAY_DEAD
q-send-succ 1376873699 0 type=error broker=0 partition=2 topic=PAY_DEAD
20130823085449,q-send-succ:0:2:PAY_DEAD,1,200,1