from pyspark.sql.functions import *
from pyspark.sql.types import *
df = spark.createDataFrame([{'a': None, 'b': 2}, {'a': [2,3], 'b': 4}])
empty_array = udf(lambda :[], ArrayType(LongType()))
# solution 1
df.withColumn('a', coalesce(col('a'), empty_array())).show()
# solution 2
df.withColumn('a', when(isnull('a'), empty_array()).otherwise(df.a)).show()
 
没有评论:
发表评论