from pyspark.sql.functions import col, size, split,udf
from pyspark.sql import SparkSession
import datetime
from datetime import timedelta
spark = SparkSession.builder.getOrCreate()
df2 = spark.read.format('csv').options(header='true',delimiter = '|').load("/tmp/dataframe_sample.csv")
@udf(returnType='int')
def date_dif(st_date,end_date):
datetimeFormat = '%Y-%m-%d %H:%M:%S'
diff = datetime.datetime.strptime(end_date, datetimeFormat) - datetime.datetime.strptime(st_date, datetimeFormat)
cnt=diff.days
return cnt
res=df2.withColumn('Noofdays',date_dif(df2.start_date,df2.end_date))
print(res.show())
No comments:
Post a Comment