weldpua2008 · August 4, 2020 10:13
diff --git a/example_spark.py b/example_spark.py
 from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType
 from pyspark.sql import SparkSession

 # Create Spark session
 spark = SparkSession.builder \
    .appName('appName') \
    .getOrCreate()
 # List
 data = [('Category A', 100, "This is category A"),
        ('Category B', 120, "This is category B"),
        ('Category C', 150, "This is category C")]

 # Create a schema for the dataframe
 schema = StructType([
    StructField('Category', StringType(), True),
    StructField('Count', IntegerType(), True),
    StructField('Description', StringType(), True)
 ])
 # Convert list to RDD
 rdd = spark.sparkContext.parallelize(data)

 # Create data frame
 df = spark.createDataFrame(rdd,schema)
 print(df.schema)
 df.show()
	from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType
	from pyspark.sql import SparkSession

	# Create Spark session
	spark = SparkSession.builder \
	.appName('appName') \
	.getOrCreate()
	# List
	data = [('Category A', 100, "This is category A"),
	('Category B', 120, "This is category B"),
	('Category C', 150, "This is category C")]

	# Create a schema for the dataframe
	schema = StructType([
	StructField('Category', StringType(), True),
	StructField('Count', IntegerType(), True),
	StructField('Description', StringType(), True)
	])
	# Convert list to RDD
	rdd = spark.sparkContext.parallelize(data)

	# Create data frame
	df = spark.createDataFrame(rdd,schema)
	print(df.schema)
	df.show()
No results found