2016-11-28 47 views
0

我想用org.apache.spark.ml.classification.MultilayerPerceptronClassifier做一个多类分类。下面给出的是我使用的代码。我有262个功能,我必须将这些功能列提供给MultilayerPerceptronClassifier。有人可以向我解释为MultilayerPerceptronClassifier提供功能的方法吗?Apache Spark MultilayerPerceptronClassifier设置功能

我可以使用setFeaturesCol()方法给出功能,但它是不可行的,因为通过使用它,我一次只能添加一个功能,但我有262个功能。

import org.apache.commons.lang3.ArrayUtils; 
import org.apache.spark.SparkConf; 
import org.apache.spark.api.java.JavaSparkContext; 
import org.apache.spark.sql.Row; 
import org.apache.spark.sql.SQLContext; 
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel; 
import org.apache.spark.ml.classification.MultilayerPerceptronClassifier; 
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator; 
import org.apache.spark.sql.DataFrame; 

public class NN { 

    final static String RESPONSE_VARIABLE = "Activity"; 
    public static void main(String args[]){ 
     // Load training data 
     SparkConf sparkConf = new SparkConf(); 
     sparkConf.setAppName("test-client").setMaster("local[2]"); 
     sparkConf.set("spark.driver.allowMultipleContexts", "true"); 
     JavaSparkContext javaSparkContext = new JavaSparkContext(sparkConf); 
     SQLContext sqlContext = new SQLContext(javaSparkContext); 

     // Convert data in csv format to Spark data frame 
     DataFrame trainDataFrame = sqlContext.read().format("com.databricks.spark.csv") 
       .option("inferSchema", "true") 
       .option("header", "true") 
       .load("/home/thamali/Desktop/Project/csv/libsvm/train.csv"); 

     DataFrame testDataFrame = sqlContext.read().format("com.databricks.spark.csv") 
       .option("inferSchema", "true") 
       .option("header", "true") 
       .load("/home/thamali/Desktop/Project/csv/libsvm/train.csv"); 

     String [] predictors = trainDataFrame.columns(); 
     predictors = ArrayUtils.removeElement(predictors, RESPONSE_VARIABLE); 


// specify layers for the neural network: 
// input layer of size 4 (features), two intermediate of size 5 and 4 
// and output of size 3 (classes) 
     int[] layers = new int[] {262, 50, 40, 12}; 
// create the trainer and set its parameters 
     MultilayerPerceptronClassifier trainer = new MultilayerPerceptronClassifier() 
       .setLayers(layers) 
       .setBlockSize(128) 
       .setSeed(1234L) 
       .setMaxIter(100); 
// train the model 
     MultilayerPerceptronClassificationModel model = trainer.fit(trainDataFrame); 
// compute accuracy on the test set 
     DataFrame result = model.transform(testDataFrame); 
     DataFrame predictionAndLabels = result.select("prediction", "label"); 
     MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() 
       .setMetricName("accuracy"); 
     System.out.println("Accuracy = " + evaluator.evaluate(predictionAndLabels)); 
    } 

} 

回答

0

我们可以使用Apache火花矢量汇编程序创建一个包含所有必要功能的向量。