Example of date functions of pyspark

  AI, Data, Data Analytics, PySpark

from pyspark.sql import SparkSession
from pyspark.sql.types import *  #data types
from pyspark.sql import functions as F   #functions

spark=SparkSession.builder.appName('XIU-Daily').getOrCreate()

input_fn = 's-p-tsx-60-futures_01.csv'
df = spark.read.csv(input_fn,header=True,inferSchema=True)
df.show(3)
+-------------------+-----+
|               date|value|
+-------------------+-----+
|1999-09-07 00:00:00|416.5|
|1999-09-08 00:00:00|417.2|
|1999-09-09 00:00:00|421.5|
+-------------------+-----+

df=df.withColumn('Date',F.date_format('date','yyyy-MM-dd')) #change date format
df=df.withColumn('current_date',F.current_date())  #current date

df=df.withColumn('year',F.year('date'))
df=df.withColumn('month',F.month('date'))
df=df.withColumn('dayofmonth',F.dayofmonth('date'))
df=df.withColumn('minute',F.minute('date'))
df=df.withColumn('second',F.second('date'))
df=df.withColumn('dayofyear',F.dayofyear('date'))
df=df.withColumn('dayofweek',F.dayofweek('date'))
df=df.withColumn('weekofyear',F.weekofyear('date'))
df=df.withColumn('quarter',F.quarter('date'))

df=df.withColumn('next_day_Mon',F.next_day('date','Mon'))
df=df.withColumn('next_day_Tue',F.next_day('date','Tue'))
df=df.withColumn('next_day_Wed',F.next_day('date','Wed'))
df=df.withColumn('next_day_Thu',F.next_day('date','Thu'))
df=df.withColumn('next_day_Fri',F.next_day('date','Fri'))
df=df.withColumn('next_day_Sat',F.next_day('date','Sat'))
df=df.withColumn('next_day_Sun',F.next_day('date','Sun'))

df=df.withColumn('trunc_year',F.trunc('date','year'))
df=df.withColumn('trunc_month',F.trunc('date','month'))

df=df.withColumn('last_day',F.last_day('date'))
df=df.withColumn('date_add_plus',F.date_add('Date',7)) # after 7 days
df=df.withColumn('date_add_minus',F.date_add('Date',-10)) # 10 days before
df=df.withColumn('date_sub',F.date_sub('date',10))
df=df.withColumn('months_between',F.months_between('date','current_date'))
df=df.withColumn('add_months_plus',F.add_months('Date',5)) # after 5 months
df=df.withColumn('add_months_minus',F.add_months('Date',-5)) # 5 months before
df=df.withColumn('Add5Years',F.add_months('Date',5*12)) # after 5 years
df=df.withColumn('sub5Years',F.add_months('Date',-5*12)) # 5 years ago
df.select('Date','year','month','dayofmonth','minute','second','last_day','dayofweek','next_day_Mon').show(5)
+----------+----+-----+----------+------+------+----------+---------+------------+
|      Date|year|month|dayofmonth|minute|second|  last_day|dayofweek|next_day_Mon|
+----------+----+-----+----------+------+------+----------+---------+------------+
|1999-09-07|1999|    9|         7|     0|     0|1999-09-30|        3|  1999-09-13|
|1999-09-08|1999|    9|         8|     0|     0|1999-09-30|        4|  1999-09-13|
|1999-09-09|1999|    9|         9|     0|     0|1999-09-30|        5|  1999-09-13|
|1999-09-10|1999|    9|        10|     0|     0|1999-09-30|        6|  1999-09-13|
|1999-09-13|1999|    9|        13|     0|     0|1999-09-30|        2|  1999-09-20|
+----------+----+-----+----------+------+------+----------+---------+------------+