Reason for this question: 我正在使用来自Android设备的传感器数据进行活动识别,因此我对陀螺仪和加速度计数据的时间戳进行一对一的对应非常重要
1 回答
2
创建演示数据:
# bad data, the weird ones are bad
data = """
ts1,d001,d002,d003
ts2,d001,d002,d003
ts3,d001,d002,d003
weird1,d001,d002,d003
weird2,d001,d002,d003
ts4,d001,d002,d003
"""
# the good data
other = """
ts1,f001,f002,f003
ts2,f001,f002,f003
ts3,f001,f002,f003
ts4,f001,f002,f003
"""
# create demo files
fn1 = "d1.csv"
fn2 = "d2.csv"
with open(fn1,"w") as f:
f.write(data)
with open(fn2,"w") as f:
f.write(other)
现在解析:
import csv
def readFile(name):
"""returns a dict for data with 4 columns"""
result = []
with open(name,"r") as f:
k = csv.DictReader(f,fieldnames=["ts","dp1","dp2","dp3"])
for l in k:
result.append(l)
return result
badData = readFile(fn1)
goodData = readFile(fn2)
print(badData)
print(goodData)
# get all the "good" ts
goodTs = set( oneDict["ts"] for oneDict in goodData)
# clean the bad data, only keep those "ts" that are in goodTs
cleanedData = [x for x in badData if x["ts"] in goodTs]
print(cleanedData)
1 回答
创建演示数据:
现在解析:
输出:
现在要消除不良数据点:
输出:
完成 .