0

its pyspark create dataframe from list of lists

I have list of lists input

alert_highlightsa = [('AML3665823', [('AML-MAS', 'AML-MAS-ALL-ALL-A-M01-MAS', 'Manual Alert Created by Joanne Cegielski', 'Party Group', 'AML3665823', 'N/A', 'N/A', [('deteccrt test', 'my drill', [('my number', 'my detect')], ['my rule'])])], ['Account Number'], ['0  Issue', '0  Non-Issue', '0  No Feedback'])]

I have created schema

from pyspark.sql import *
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pyspark.sql.functions as F

alert_schema = StructType([ StructField("alert_highlights", ArrayType(
StructType([
  StructField("entity_number_1", StringType()),
  StructField("alert_details_data", ArrayType(
      StructType([
          StructField("model", StringType()),
          StructField("rule_id", StringType()),
          StructField("rule_desc", StringType()),
          StructField("entity_type", StringType()),
          StructField("entity_key", StringType()),
          StructField("threshold_detection", StringType()),
          StructField("threshold_recurrence", StringType()),
          StructField("recurrence_trans_data", ArrayType(
              StructType([
                  StructField("detection_value", StringType()),
                  StructField("drill_trx_key", StringType()),
                  StructField("occurrence_entities", ArrayType(
                      StructType([
                          StructField("entity_number_2", StringType()),
                          StructField("detection_value_1", StringType())]))),
                  StructField("score_detail", ArrayType(
                      StructType([
                          StructField("rule", StringType())
                      ])))
                  ])))
              ]))),
  StructField("alert_texts", ArrayType(
        StructType([
            StructField("entity_number_3", StringType())
        ])
    )),
  StructField("prior_details_alert_detail", ArrayType(
        StructType([
            StructField("factor_value", StringType())
        ])
    ))])))

    ])

rdd = sc.parallelize(alert_highlightsa)
df=spark.createDataFrame(rdd,alert_schema)

df.select(F.explode('alert_highlights').alias('H')).select('H.*')\
    .select('entity_number_1',F.explode('alert_details_data').alias('A'),F.explode('alert_texts').alias('T'),F.explode('prior_details_alert_detail').alias('P'))\
    .select('entity_number_1','A.*','T.*','P.*')\
    .select('entity_number_1','model', 'rule_id','rule_desc','entity_type', 'entity_key', 'threshold_detection',' threshold_recurrence', F.explode('recurrence_trans_data').alias('R'),'entity_number_3','factor_value')\
    .select('entity_number_1','model', 'rule_id','rule_desc','entity_type', 'entity_key', 'threshold_detection',' threshold_recurrence','R.*','entity_number_3','factor_value')\
    .select('entity_number_1','model', 'rule_id','rule_desc','entity_type', 'entity_key', 'threshold_detection',' threshold_recurrence','detection_value', 'drill_trx_key', F.explode('occurrence_entities').alias('O'), F.explode('score_detail').alias('S'),'entity_number_3','factor_value')\
    .select('entity_number_1','model', 'rule_id','rule_desc','entity_type', 'entity_key', 'threshold_detection',' threshold_recurrence','detection_value', 'drill_trx_key', 'O.*', 'S.*','entity_number_3','factor_value')\
    .select('entity_number_1','model', 'rule_id','rule_desc','entity_type', 'entity_key', 'threshold_detection',' threshold_recurrence','detection_value', 'drill_trx_key', 'entity_number_2', 'detection_value_1','rule','entity_number_3','factor_value')\
    .show

I am not able to figure out where the issue is.

data from dataframe is not displayed. 

appreciate your recommendations to resolve the issue

1reply Oldest first
  • Oldest first
  • Newest first
  • Active threads
  • Popular
  • Anurag Malik , Please get this issue resolved ASAP. We need to deliver this solution to our customer immediately. Appreciate your help and support.
    - Subba Jevisetty
    Lead Data Scientist
    Wipro Limited
    (860)371-6012 (M)

    Reply Like
Like Follow
  • 1 mth agoLast active
  • 1Replies
  • 37Views
  • 2 Following