-
This sample materialized view (MV) script can be used to validate data in incorta versus the data from the report of customer's legacy BI solution for help during testing .
1)First download data from incorta dashboard and existing BI report in CSV files and import them into table in a Test schema and load these two tables.
2) Create a mv in Incorta using the below sample script. Make sure you modify the table and column names depending on step 1.
3) Load the MV to find the diffs and then build a dashboard on this MV to analyze the diffs.
------------
import pyspark.sql.functions as F
from pyspark.sql.functions import lit, when, col, coalesce
def build_diff():
str1 = """SELECT Col1,Col2,Col3 AS Amount FROM LEGACY_BI_RPT """
str2 = """SELECT Col1,Col2,MyCol AS Amount FROM INCORTA_BI_RPT """df1 = spark.sql(str1)
df2 = spark.sql(str2)
m1 = df1.subtract(df2)
m1 = m1.withColumn("Type", F.lit('BINotMatchingIncorta'))
m2 = df2.subtract(df1)
m2 = m2.withColumn("Type", F.lit('IncortaNotMatchingBI'))
m = m1.union(m2)
return mdf_BI_DETAILS=read("Test.LEGACY_BI_RPT")
df_BI_DETAILS.createOrReplaceTempView("LEGACY_BI_RPT")
df_INCORTA_DETAILS=read("Test.INCORTA_BI_RPT")
df_INCORTA_DETAILS.createOrReplaceTempView("INCORTA_BI_RPT")result =()
result = build_diff()
save(result)-------------
Reply -
HI Amit,
I m trying to compare two reports.but when i pull the columns from my MV it is not showing up any data,even though the two reports are not same. can you please check and tell me what could be the issue. i m attaching the MV script along with two sample data's that i took for validation.
import pyspark.sql.functions as F
from pyspark.sql.functions import lit, when, col, coalesce
def build_diff():
str1 = """SELECT org_id,operating_unit as org FROM report1 """
str2 = """SELECT org_id,operating_unit as org FROM report2 """df1 = spark.sql(str1)
df2 = spark.sql(str2)
m1 = df1.subtract(df2)
m1 = m1.withColumn("Type", F.lit('BINotMatchingIncorta'))m2 = df2.subtract(df1)
m2 = m2.withColumn("Type", F.lit('IncortaNotMatchingBI'))
m = m1.union(m2)
return mdf_BI_DETAILS=read("A_DEMO.report1")
df_BI_DETAILS.createOrReplaceTempView("report1")
df_INCORTA_DETAILS=read("A_DEMO.report2")
df_INCORTA_DETAILS.createOrReplaceTempView("report2")result =()
result = build_diff()
save(result)Reply -
Note : This script is also not returning records.
import pyspark.sql.functions as F
from pyspark.sql.functions import lit, when, col, coalesce
def build_diff():
str1 = """SELECT * FROM report1 """
str2 = """SELECT * FROM report2 """df1 = spark.sql(str1)
df2 = spark.sql(str2)
m1 = df1.subtract(df2)
m1 = m1.withColumn("Type", F.lit('BINotMatchingIncorta'))m2 = df2.subtract(df1)
m2 = m2.withColumn("Type", F.lit('IncortaNotMatchingBI'))
m = m1.union(m2)
return mdf_BI_DETAILS=read("A_DEMO.report1")
df_BI_DETAILS.createOrReplaceTempView("report1")
df_INCORTA_DETAILS=read("A_DEMO.report2")
df_INCORTA_DETAILS.createOrReplaceTempView("report2")result =()
result = build_diff()
save(result)Reply