python 处理大数据,有需要的朋友可以参考下。

最近大数据竞赛很火,本人python没学多久,想试着写一下,只是实现了数据的处理,主要用到了dict,list,file知识。
还有一点要说,我也用matlab实现了,但是运行完要差不多两分钟,但是python秒处理,有木有啊,足见python处理文本功能之强大。
文件里的数据格式:
clientid shopingid num date
1111000 3873 2 4月5日
clientinfo = []
shopinginfo = {}
month={}
day={}
shopidflag = 0
clientstartflag = 0
total={}
tmpclientid=''
output= open('f:/a.txt','a')
with open('f:/s.txt','r') as data_file:
for lineinfo in data_file:
lineinfo = lineinfo.split()
clientid = lineinfo[0]
shopingid = lineinfo[1]
num=[]
num.append(lineinfo[2])
data = lineinfo[3]
data = data[:-1]
data = data.split('月')
monthvar=[]
monthvar.append(data[0])
dayvar=[]
dayvar.append(data[1])
if clientid in clientinfo and shopingid in shopinginfo and int(data[0])>=6:
shopinginfo[shopingid].append(lineinfo[2])
month[shopingid].append(data[0])
day[shopingid].append(data[1])
elif clientid in clientinfo and shopingid not in shopinginfo and int(data[0])>=6:
shopinginfo[shopingid]=num
month[shopingid]= monthvar
day[shopingid] = dayvar
elif clientid not in clientinfo :
#if clientstartflag == 1:
clientflag = 0
shopinglink=''
for (k, v) in shopinginfo.items():
total={}
vote=0
for i in v:
if i in total:
total[i]+=1
else:
total[i]=1
for var in total:
if var == '0':
vote += total[var]
elif var == '1':
vote = 0
break
elif var == '2':
vote += total[var]*2
else:
vote += total[var]*3
if vote >= 3:
if clientflag == 0:
output.write(tmpclientid+'\t')
clientflag =1
shopinglink+=k+','
if clientflag == 1:
output.write(shopinglink.strip(',')+'\r\n')
shopinginfo={}
month ={}
day ={}
clientinfo=[]
tmpclientid=clientid
clientinfo.append(clientid)
shopinginfo[shopingid]=num
month[shopingid] = monthvar
day[shopingid] = dayvar
shopinglink=''
for (k, v) in shopinginfo.items():
for i in v:
if i in total:
total[i]+=1
else:
total[i]=1
total={}
vote=0
for i in v:
if i in total:
total[i]+=1
else:
total[i]=1
for var in total:
if var == '0':
vote += total[var]
elif var == '1':
vote = 0
break
elif var == '2':
vote += total[var]*2
else:
vote += total[var]*3
if vote >= 3:
if clientflag == 0:
clientflag =1
shopinglink+=k+','
if clientflag == 1:
output.write(tmpclientid+'\t')
output.write(shopinglink.strip(','))
data_file.close()
output.close()
猜您喜欢:
