- 现在我们将使用union运算符将插入、活动更新和非活动更新拉入单个DataFrame 。将此DataFrame作为最终 Hudi 写入逻辑的增量源 。
scala> val upsertDf = insDf.union(updActiveDf).union(updInactiveDf)scala> upsertDf.show+---------+-------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+|seller_id|prod_category| product_name|product_package|discount_percentage| eff_start_ts| eff_end_ts|actv_ind|+---------+-------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+| 3345| Stationary| Sticky Notes| 4| 12|2022-07-09 21:30:45|9999-12-31 23:59:59| 1|| 4565| Gourmet|Dairy Milk Almond| 12| 45|2022-06-12 20:30:40|9999-12-31 23:59:59| 1|| 1234| Detergent| Tide 5L| 6| 25|2022-01-31 10:00:30|9999-12-31 23:59:59| 1|| 4565| Gourmet| Dairy Milk Silk| 6| 30|2021-06-12 20:30:40|2022-06-12 20:30:39| 0|| 1234| Detergent| Tide 2L| 6| 15|2021-12-15 15:20:30|2022-01-31 10:00:29| 0|+---------+-------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+val path = "gs://target_bucket/hudi_product_catalog"upsertDf.write.format("org.apache.hudi").option(TABLE_TYPE_OPT_KEY, "COPY_ON_WRITE").option("hoodie.datasource.write.keygenerator.class","org.apache.hudi.keygen.ComplexKeyGenerator").option(RECORDKEY_FIELD_OPT_KEY, "seller_id,prod_category,eff_end_ts").option(PRECOMBINE_FIELD_OPT_KEY, "eff_start_ts").option("hoodie.table.name","hudi_product_catalog").option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "target_schema").option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "hudi_product_catalog").option(OPERATION_OPT_KEY, UPSERT_OPERATION_OPT_VAL).option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY, "true").option(PARTITIONPATH_FIELD_OPT_KEY, "actv_ind").mode(Append).save(s"$path")scala> spark.sql("refresh table stg_wmt_ww_fin_rtn_mb_dl_secure.hudi_product_catalog")scala> spark.sql("select * from stg_wmt_ww_fin_rtn_mb_dl_secure.hudi_product_catalog").show(false)+-------------------+---------------------+-------------------------------------------------------------------------+----------------------+--------------------------------------------------------------------------+---------+--------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+|_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key |_hoodie_partition_path|_hoodie_file_name |seller_id|prod_category |product_name |product_package|discount_percentage|eff_start_ts |eff_end_ts |actv_ind|+-------------------+---------------------+-------------------------------------------------------------------------+----------------------+--------------------------------------------------------------------------+---------+--------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+|20220722113258101 |20220722113258101_0_0|seller_id:3412,prod_category:Healthcare,eff_end_ts:253402300799000000 |actv_ind=1 |a94c9c58-ac6b-4841-a734-8ef1580e2547-0_0-72-2451_20220722114049500.parquet|3412 |Healthcare |Dolo 650 |10 |10 |2022-04-01 16:30:45|9999-12-31 23:59:59|1 ||20220722113258101 |20220722113258101_0_1|seller_id:1234,prod_category:Home Essential,eff_end_ts:253402300799000000|actv_ind=1 |a94c9c58-ac6b-4841-a734-8ef1580e2547-0_0-72-2451_20220722114049500.parquet|1234 |Home Essential|Hand Towel |12 |20 |2021-10-20 06:55:22|9999-12-31 23:59:59|1 ||20220722114049500 |20220722114049500_0_2|seller_id:4565,prod_category:Gourmet,eff_end_ts:253402300799000000 |actv_ind=1 |a94c9c58-ac6b-4841-a734-8ef1580e2547-0_0-72-2451_20220722114049500.parquet|4565 |Gourmet |Dairy Milk Almond|12 |45 |2022-06-12 20:30:40|9999-12-31 23:59:59|1 ||20220722114049500 |20220722114049500_0_3|seller_id:1234,prod_category:Detergent,eff_end_ts:253402300799000000 |actv_ind=1 |a94c9c58-ac6b-4841-a734-8ef1580e2547-0_0-72-2451_20220722114049500.parquet|1234 |Detergent |Tide 5L |6 |25 |2022-01-31 10:00:30|9999-12-31 23:59:59|1 ||20220722114049500 |20220722114049500_0_4|seller_id:3345,prod_category:Stationary,eff_end_ts:253402300799000000 |actv_ind=1 |a94c9c58-ac6b-4841-a734-8ef1580e2547-0_0-72-2451_20220722114049500.parquet|3345 |Stationary |Sticky Notes |4 |12 |2022-07-09 21:30:45|9999-12-31 23:59:59|1 ||20220722114049500 |20220722114049500_1_0|seller_id:4565,prod_category:Gourmet,eff_end_ts:1655065839000000 |actv_ind=0 |789e0317-d499-4d74-a5d9-ad6e6517d6b8-0_1-72-2452_20220722114049500.parquet|4565 |Gourmet |Dairy Milk Silk |6 |30 |2021-06-12 20:30:40|2022-06-12 20:30:39|0 ||20220722114049500 |20220722114049500_1_1|seller_id:1234,prod_category:Detergent,eff_end_ts:1643623229000000 |actv_ind=0 |789e0317-d499-4d74-a5d9-ad6e6517d6b8-0_1-72-2452_20220722114049500.parquet|1234 |Detergent |Tide 2L |6 |15 |2021-12-15 15:20:30|2022-01-31 10:00:29|0 |+-------------------+---------------------+-------------------------------------------------------------------------+----------------------+--------------------------------------------------------------------------+---------+--------------+-----------------+---------------+-------------------+-------------------+-------------------+--------+
经验总结扩展阅读
-
2022年10月24日是出火的黄道吉日吗 2022年农历九月廿九宜出火吗
-
亚麻衣服怎么洗不缩水,亚麻面料的衣服这样清洗不会缩水?
-
-
-
-
-
-
-
-
导语:心仪的他就在眼前 心仪的男人不唐突的4种搭讪方式,教你把握机会,迈出追爱第一步
-
-
再过4天,鸿运当头,高人一等,喜事接踵,横财不断,尽享富贵
-
六款2023年最佳手机推荐 手机好用排行榜2023
-
-
疾病 40岁是肠道疾病“高发期”,若想肠道更健康,3个坏习惯要改掉
-
-
-
-
-