@@ -568,3 +568,72 @@ def test_inspect_metadata_log_entries(
568568 if column == "timestamp" :
569569 continue
570570 assert left == right , f"Difference in column { column } : { left } != { right } "
571+
572+
573+ @pytest .mark .integration
574+ @pytest .mark .parametrize ("format_version" , [1 , 2 ])
575+ def test_inspect_history (spark : SparkSession , session_catalog : Catalog , format_version : int ) -> None :
576+ identifier = "default.table_history"
577+
578+ try :
579+ session_catalog .drop_table (identifier = identifier )
580+ except NoSuchTableError :
581+ pass
582+
583+ spark .sql (
584+ f"""
585+ CREATE TABLE { identifier } (
586+ id int,
587+ data string
588+ )
589+ PARTITIONED BY (data)
590+ """
591+ )
592+
593+ spark .sql (
594+ f"""
595+ INSERT INTO { identifier } VALUES (1, "a")
596+ """
597+ )
598+
599+ table = session_catalog .load_table (identifier )
600+ first_snapshot = table .current_snapshot ()
601+ snapshot_id = None if not first_snapshot else first_snapshot .snapshot_id
602+
603+ spark .sql (
604+ f"""
605+ INSERT INTO { identifier } VALUES (2, "b")
606+ """
607+ )
608+
609+ spark .sql (
610+ f"""
611+ CALL integration.system.rollback_to_snapshot('{ identifier } ', { snapshot_id } )
612+ """
613+ )
614+
615+ spark .sql (
616+ f"""
617+ INSERT INTO { identifier } VALUES (3, "c")
618+ """
619+ )
620+
621+ table .refresh ()
622+
623+ df = table .inspect .history ()
624+
625+ assert df .column_names == [
626+ "made_current_at" ,
627+ "snapshot_id" ,
628+ "parent_id" ,
629+ "is_current_ancestor" ,
630+ ]
631+
632+ lhs = spark .table (f"{ identifier } .history" ).toPandas ()
633+ rhs = df .to_pandas ()
634+ for column in df .column_names :
635+ for left , right in zip (lhs [column ].to_list (), rhs [column ].to_list ()):
636+ if isinstance (left , float ) and math .isnan (left ) and isinstance (right , float ) and math .isnan (right ):
637+ # NaN != NaN in Python
638+ continue
639+ assert left == right , f"Difference in column { column } : { left } != { right } "
0 commit comments