0
我将首先解释提到代码的问题。Spark Spark RDD中的字符串替换
numPartitions = 2
rawData1 = sc.textFile('train_new.csv', numPartitions,use_unicode=False)
rawData1.take(1)
['1,0,0,0,0,0,0,0,0,0,0,1,0,0,5,0,0,0,0,0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,9,0,0,0,0,0,Class_2']
现在我想更换Class_2到2
后更换的答案应该是
['1,0,0,0,0,0,0,0,0,0,0,1,0,0,5,0,0,0,0,0,0,0,0,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,9,0,0,0,0,0,2']
一旦我明白了此行,我会为整个数据执行操作套装
在前提前致谢 Aashish