From 21263fc6b87db527362b8eece6b94aa922f64dc1 Mon Sep 17 00:00:00 2001 From: Ed Elliott Date: Thu, 7 Nov 2024 21:11:41 +0000 Subject: [PATCH] null lit --- .../Spark.Connect.Dotnet/Sql/Column.cs | 15 ++++ .../Sql/ManualFunctions.cs | 81 +++++++++++++++++++ .../Spark.Connect.Dotnet.Tests/ColumnTests.cs | 14 +++- .../ManuallyWrittenFunctionsTests.cs | 32 ++++++++ 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/Column.cs b/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/Column.cs index 3cbd9d7..560d38c 100644 --- a/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/Column.cs +++ b/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/Column.cs @@ -1291,6 +1291,21 @@ public Column IsNull() return new Column(expression); } + + public Column IsNotNull() + { + var expression = new Expression + { + UnresolvedFunction = new Expression.Types.UnresolvedFunction + { + FunctionName = "isnotnull", IsUserDefinedFunction = false, IsDistinct = false + } + }; + + expression.UnresolvedFunction.Arguments.Add(Expression); + + return new Column(expression); + } public Column EndsWith(string other) { diff --git a/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/ManualFunctions.cs b/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/ManualFunctions.cs index 0eadaaf..22015d8 100644 --- a/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/ManualFunctions.cs +++ b/src/Spark.Connect.Dotnet/Spark.Connect.Dotnet/Sql/ManualFunctions.cs @@ -212,6 +212,9 @@ public static Column Lit(object o) Literal = new Expression.Types.Literal { Null = new DataType() + { + Null = new DataType.Types.NULL() + } } }); } @@ -285,6 +288,59 @@ public static Column Lit(bool value) }); } + public static Column Lit(DateTime? value) + { + return new Column(new Expression + { + Literal = new Expression.Types.Literal() + { + Null = new DataType() + { + Timestamp = new DataType.Types.Timestamp() + } + } + }); + } + public static Column Lit(float? value) + { + return new Column(new Expression + { + Literal = new Expression.Types.Literal() + { + Null = new DataType() + { + Float = new DataType.Types.Float() + } + } + }); + } + public static Column Lit(long? value) + { + return new Column(new Expression + { + Literal = new Expression.Types.Literal() + { + Null = new DataType() + { + Long = new DataType.Types.Long() + } + } + }); + } + public static Column Lit(double? value) + { + return new Column(new Expression + { + Literal = new Expression.Types.Literal() + { + Null = new DataType() + { + Double = new DataType.Types.Double() + } + } + }); + } + public static Column Lit(double value) { return new Column(new Expression @@ -432,6 +488,31 @@ public static Column Column(string name) return new Column(name); } + /// + /// If you want to pass `Lit(null)` then you can use this or do `Lit(null as type)` + /// + /// Lit + public static Column LitNull() => Lit(); + + /// + /// If you want to pass `Lit(null)` then you can use this or do `Lit(null as type)` + /// + /// Lit + public static Column Lit() + { + var expr = new Expression() + { + Literal = new Expression.Types.Literal() + { + Null = new DataType() + { + Null = new DataType.Types.NULL() + } + } + }; + + return new Column(expr); + } /// List<String> /// Returns a new :class:`Column` for distinct count of ``col`` or ``cols``. diff --git a/src/test/Spark.Connect.Dotnet.Tests/ColumnTests.cs b/src/test/Spark.Connect.Dotnet.Tests/ColumnTests.cs index 0404f14..32e5e27 100644 --- a/src/test/Spark.Connect.Dotnet.Tests/ColumnTests.cs +++ b/src/test/Spark.Connect.Dotnet.Tests/ColumnTests.cs @@ -32,7 +32,15 @@ public void IsNullTest() { var df = Spark.Sql( "SELECT cast(null as string) as a, id from range(4) union SELECT 'aa' as a, id from range(100, 10)"); - df.Filter(Col("a").IsNull()).Show(); + df.Select(Col("a"), Col("a").IsNull()).Show(); + } + + [Fact] + public void IsNotNullTest() + { + var df = Spark.Sql( + "SELECT cast(null as string) as a, id from range(4) union SELECT 'aa' as a, id from range(100, 10)"); + df.Select(Col("a"), Col("a").IsNotNull()).Show(); } @@ -132,5 +140,9 @@ public void IsInTests() { var df = Spark.CreateDataFrame(new List<(object, object)> { (2, "Alice"), (5, "Bob") }, "age", "name"); df.Select(Col("name"), df["name"].IsIn("Bob", "Mike")).Show(); + foreach (var row in df.Collect()) + { + Console.WriteLine(row[0]); + } } } \ No newline at end of file diff --git a/src/test/Spark.Connect.Dotnet.Tests/FunctionsTests/ManuallyWrittenFunctionsTests.cs b/src/test/Spark.Connect.Dotnet.Tests/FunctionsTests/ManuallyWrittenFunctionsTests.cs index de8f6e6..958c467 100644 --- a/src/test/Spark.Connect.Dotnet.Tests/FunctionsTests/ManuallyWrittenFunctionsTests.cs +++ b/src/test/Spark.Connect.Dotnet.Tests/FunctionsTests/ManuallyWrittenFunctionsTests.cs @@ -1712,4 +1712,36 @@ public void Collation_Test() Spark.Conf.Set("spark.sql.collation.enabled", "false"); //is hidden behind feature flag } + [Fact] + public void NullLit_Test() + { + var df = Spark.Range(100).WithColumn("a", Lit()); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as object)); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as int?)); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as long?)); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as float?)); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as double?)); + df.Show(); + df.PrintSchema(); + + df = Spark.Range(100).WithColumn("a", Lit(null as DateTime?)); + df.Show(); + df.PrintSchema(); + } + }