from pyspark import SparkConf, SparkContext import os os.environ["PYSPARK_PYTHON"] = "D:\programtool\conda\python" conf = SparkConf().setMaster("local[*]").setAppName("practise_spark_app") sc = SparkContext(conf=conf) # 读取文件 rdd = sc.textFile("hello.txt") # 取出全部单词 word_rdd = rdd.flatMap(lambda x: x.split(" ")) # print(word_rdd.collect()) # 将单词转为二元元组,单词为key,value为1 word_with_one_rdd = word_rdd.map(lambda x: (x, 1)) # print(word_with_one_rdd.collect()) # 分组求和 同名的值相加 result_rdd = word_with_one_rdd.reduceByKey(lambda a, b: a + b) print(result_rdd) sc.stop()