val data: RDD[String]
val data2 = data.map(x => {
val y = x.split("\\|\\|", -1)
y
})
val data3 = data2.toDF()
data3.show(10)
+--------------------+
| value|
+--------------------+
|[66,................]|
|[81,...
准备工作
查看数据创建SparkContext
val spark = new SparkConf().setMaster("local[6]").setAppName("wordCount")
val sc = new SparkContext(spark)
val rdd = sc.textFile("data/wordcount.txt")
使用groupBy
/**
* 使用groupBy
*/
@Test
def test