Examples for pivot_table of Pandas and crosstab of Pyspark from my work directory:pyWorkDir/Bigdata/Pyspark/DataForYuanPei.ipynb
pivot_table
casepandas=indcases.toPandas()
casetable1=pd.pivot_table(casepandas,
values='VALUE',
index=["Case identifier number"],
columns=["Case information"],
aggfunc=np.sum)
crosstab
casetable=casedf.crosstab('case_Date','province')
casetable=casetable.toPandas()
casetable=casetable.sort_values('case_Date_province')
cumsum_casetable=casetable.set_index('case_Date_province').cumsum()
cumsum_casetable['CA']=cumsum_casetable.sum(axis=1)
casedftable=casedf.crosstab('case_Date','health_region')
health_region_table=casedftable.select(['case_Date_health_region','Toronto','Montréal','Vancouver Coastal','Ottawa'])