from pyspark.sql import SparkSession
spark=SparkSession.builder.appName('creditcard_data_processing').getOrCreate()
fn = "SYB62_123_201907_Total.csv"
df=spark.read.csv(fn,header=True,inferSchema=True)
df.sample(False,0.1,seed=250.0).limit(5).select('_c1','Series','Value').show(truncate=False)
+-----------------------------+------------------------------------------------+--------------+
|_c1 |Series |Value |
+-----------------------------+------------------------------------------------+--------------+
|Total, all countries or areas|Exports FOB (millions of US dollars) |5050237.9273 |
|Total, all countries or areas|Exports FOB (millions of US dollars) |1.5843502717E7|
|Africa |Exports FOB (millions of US dollars) |397465.322 |
|Africa |Balance imports/exports (millions of US dollars)|-10892.3263 |
|Eastern Africa |Exports FOB (millions of US dollars) |44536.6955 |
+-----------------------------+------------------------------------------------+--------------+
Like this:
Like Loading...