from pyspark.sql import SparkSession
# Create a Spark session
spark = SparkSession.builder.appName("example").getOrCreate()
# Sample data for DataFrame 1
data1 = [("Alice", 25, "New York"),
("Bob", 30, "Los Angeles"),
("Charlie", 35, "Chicago")]
# Sample data for DataFrame 2
data2 = [("David", 40, "San Francisco"),
("Eve", 45, "Miami"),
("Frank", 50, "Seattle")]
# Create DataFrames
columns = ["Name", "Age", "City"]
df1 = spark.createDataFrame(data1, columns)
df2 = spark.createDataFrame(data2, columns)
# Show the DataFrames
df1.show()
df2.show()
# Perform a union operation
union_df = df1.union(df2)
# Show the result
union_df.show()
Subscribe to:
Post Comments (Atom)
Data synchronization in Lakehouse
Data synchronization in Lakebase ensures that transactional data and analytical data remain up-to-date across the lakehouse and Postgres d...
-
Steps to Implement Medallion Architecture : Ingest Data into the Bronze Layer : Load raw data from external sources (e.g., databases, AP...
-
from pyspark.sql import SparkSession from pyspark.sql.types import ArrayType, StructType from pyspark.sql.functions import col, explode_o...
-
Databricks Platform Architecture The Databricks platform architecture consists of two main components: the Control Plane and the Data Pla...
No comments:
Post a Comment