======================json file with spark sql========================
from pyspark.sql.functions import *
df=spark.read.format("json").option("multiline","true").load("/dhiru/input/data2.json")
df.createOrReplaceTempView('jtable')
>>> df.printSchema()
root
|-- data1: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- caption: string (nullable = true)
| | |-- created_time: string (nullable = true)
| | |-- description: string (nullable = true)
| | |-- from: struct (nullable = true)
| | | |-- category: string (nullable = true)
| | | |-- category_list: array (nullable = true)
| | | | |-- element: struct (containsNull = true)
| | | | | |-- id: string (nullable = true)
| | | | | |-- name: string (nullable = true)
| | | |-- id: string (nullable = true)
| | | |-- name: string (nullable = true)
| | |-- icon: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- likes: struct (nullable = true)
| | | |-- data: array (nullable = true)
| | | | |-- element: struct (containsNull = true)
| | | | | |-- id: string (nullable = true)
| | | | | |-- name: string (nullable = true)
| | |-- link: string (nullable = true)
| | |-- message: string (nullable = true)
| | |-- name: string (nullable = true)
| | |-- picture: string (nullable = true)
| | |-- privacy: struct (nullable = true)
| | | |-- value: string (nullable = true)
| | |-- status_type: string (nullable = true)
| | |-- type: string (nullable = true)
| | |-- updated_time: string (nullable = true)
spark.sql("select cap.caption,cap.created_time,cat.category,lst.* from jtable lateral view explode(data1) as cap lateral view explode(data1.from) as cat lateral view explode(cat.category_list) as lst ").show()
Output :---
+------------------+--------------------+--------------------+---------------+--------------------+
| caption| created_time| category| id| name|
+------------------+--------------------+--------------------+---------------+--------------------+
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|139823692748565|Computers & Elect...|
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|177721448951559| Workplace & Office|
+------------------+--------------------+--------------------+---------------+--------------------+
from pyspark.sql.functions import *
df=spark.read.format("json").option("multiline","true").load("/dhiru/input/data2.json")
df.createOrReplaceTempView('jtable')
>>> df.printSchema()
root
|-- data1: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- caption: string (nullable = true)
| | |-- created_time: string (nullable = true)
| | |-- description: string (nullable = true)
| | |-- from: struct (nullable = true)
| | | |-- category: string (nullable = true)
| | | |-- category_list: array (nullable = true)
| | | | |-- element: struct (containsNull = true)
| | | | | |-- id: string (nullable = true)
| | | | | |-- name: string (nullable = true)
| | | |-- id: string (nullable = true)
| | | |-- name: string (nullable = true)
| | |-- icon: string (nullable = true)
| | |-- id: string (nullable = true)
| | |-- likes: struct (nullable = true)
| | | |-- data: array (nullable = true)
| | | | |-- element: struct (containsNull = true)
| | | | | |-- id: string (nullable = true)
| | | | | |-- name: string (nullable = true)
| | |-- link: string (nullable = true)
| | |-- message: string (nullable = true)
| | |-- name: string (nullable = true)
| | |-- picture: string (nullable = true)
| | |-- privacy: struct (nullable = true)
| | | |-- value: string (nullable = true)
| | |-- status_type: string (nullable = true)
| | |-- type: string (nullable = true)
| | |-- updated_time: string (nullable = true)
spark.sql("select cap.caption,cap.created_time,cat.category,lst.* from jtable lateral view explode(data1) as cap lateral view explode(data1.from) as cat lateral view explode(cat.category_list) as lst ").show()
Output :---
+------------------+--------------------+--------------------+---------------+--------------------+
| caption| created_time| category| id| name|
+------------------+--------------------+--------------------+---------------+--------------------+
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|139823692748565|Computers & Elect...|
|newsroom.cisco.com|2013-11-11T04:04:...|Computers/technology|177721448951559| Workplace & Office|
+------------------+--------------------+--------------------+---------------+--------------------+
No comments:
Post a Comment