on 04-29-2024 01:09 PM
How do I parse a JSON string from column to flattened structure in Incorta?
Assuming a Incorta table has a column with JSON string then here is a sample pyspark code which you can use to create a Materialized view in Incorta -
from pyspark.sql.types import *
import json
def parse_json_udf(json_string, col_name):
if json_string == None or json_string == "":
return None
try:
parsed_json = json.loads(json_string)
if col_name not in parsed_json:
return None
value = parsed_json[col_name]
if type(value) is list:
return ", ".join(value)
if type(value) is str:
return str
return str(value)
except Exception:
return None
spark.udf.register('parse_json', parse_json_udf, StringType())
df = read("BI_Test.Activities").select('ID', 'ExtraData')
df.createOrReplaceTempView("table")
df = spark.sql(
"""
SELECT
ID,
parse_json(ExtraData, 'orig_activity') AS orig_activity,
parse_json(ExtraData, 'orig_activity_status') AS orig_activity_status,
parse_json(ExtraData, 'new_activity_status') AS new_activity_status,
parse_json(ExtraData, 'new_activity') AS new_activity,
parse_json(ExtraData, 'comments') AS comments,
parse_json(ExtraData, 'changes') AS changes
FROM table
"""
)
save(df)