Saturday, March 14, 2020

Wordcount using text file from local

from pyspark import SparkContext
sc=SparkContext()
spark=SparkSession.builder.getOrCreate()
class wc2:
   try:
    def __init__(self):
        print('HI')
        rd_1=sc.textFile("D:/Software/coding/data/file2.txt")
        rd_2=rd_1.flatMap(lambda x:x.split(' ')).map(lambda x:(x,1)).reduceByKey(lambda x,y:x+y)
        df=rd_2.toDF(schema='word string,count string')
        df.write.format('csv').save('D:/Software/coding/data/result1')
   except IOError:
       print("Issue with file") 
     
wc2()

No comments:

Post a Comment