Wednesday, July 31, 2019

Parse json file using spark SQL

======================json file with spark sql========================
from pyspark.sql.functions import *
df=spark.read.format("json").option("multiline","true").load("/dhiru/input/data2.json")
df.createOrReplaceTempView('jtable')

>>> df.printSchema()

root
 |-- data1: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- caption: string (nullable = true)
 |    |    |-- created_time: string (nullable = true)
 |    |    |-- description: string (nullable = true)
 |    |    |-- from: struct (nullable = true)
 |    |    |    |-- category: string (nullable = true)
 |    |    |    |-- category_list: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- id: string (nullable = true)
 |    |    |    |    |    |-- name: string (nullable = true)
 |    |    |    |-- id: string (nullable = true)
 |    |    |    |-- name: string (nullable = true)
 |    |    |-- icon: string (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- likes: struct (nullable = true)
 |    |    |    |-- data: array (nullable = true)
 |    |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |    |-- id: string (nullable = true)
 |    |    |    |    |    |-- name: string (nullable = true)
 |    |    |-- link: string (nullable = true)
 |    |    |-- message: string (nullable = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- picture: string (nullable = true)
 |    |    |-- privacy: struct (nullable = true)
 |    |    |    |-- value: string (nullable = true)
 |    |    |-- status_type: string (nullable = true)
 |    |    |-- type: string (nullable = true)
 |    |    |-- updated_time: string (nullable = true)


spark.sql("select cap.caption,cap.created_time,cat.category,lst.* from jtable lateral view explode(data1) as cap lateral view explode(data1.from) as cat lateral view explode(cat.category_list) as lst ").show()

Output :---
+------------------+--------------------+--------------------+---------------+--------------------+
|           caption|        created_time|            category|             id|                name|
+------------------+--------------------+--------------------+---------------+--------------------+
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|139823692748565|Computers & Elect...|
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|177721448951559|  Workplace & Office|
+------------------+--------------------+--------------------+---------------+--------------------+

No comments:

Post a Comment