Thursday, October 1, 2020

Pyspark calculate No of Days

 from pyspark.sql.functions import col, size, split,udf

from pyspark.sql import SparkSession

import datetime

from datetime import timedelta


spark = SparkSession.builder.getOrCreate()


df2 = spark.read.format('csv').options(header='true',delimiter = '|').load("/tmp/dataframe_sample.csv")


@udf(returnType='int')

def date_dif(st_date,end_date):

    datetimeFormat = '%Y-%m-%d %H:%M:%S'

    diff = datetime.datetime.strptime(end_date, datetimeFormat) - datetime.datetime.strptime(st_date, datetimeFormat)

    cnt=diff.days

    return cnt 

  

res=df2.withColumn('Noofdays',date_dif(df2.start_date,df2.end_date))

print(res.show())

No comments:

Post a Comment