Skip to content

Commit 9f79549

Browse files
committed
Resolve comments
1 parent d094909 commit 9f79549

File tree

13 files changed

+639
-82
lines changed

13 files changed

+639
-82
lines changed

language-extensions/dotnet-core-CSharp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ For more information about SQL Server Language Extensions, refer to this [docume
77

88
The dotnet-core-CSharp-extension version in this repository is compatible with SQL Server 2019 CU3 onwards. It integrates .NET core in SQL Server and works with .NET 6.0 in **Windows only**.
99

10-
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR, SQL_C_WCHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, varchar(n), nvarchar(n).
10+
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR, and SQL_C_WCHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, char(n), varchar(n), nchar(n), and nvarchar(n).
1111

1212
To use this dotnet-core-CSharp-lang-extension.zip package, follow [this tutorial](./sample/regex/README.md). For any fixes or enhancements, you are welcome to modify, rebuild and use the binaries using the following instructions.
1313

language-extensions/dotnet-core-CSharp/src/managed/CSharpInputDataSet.cs

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,24 +130,21 @@ private unsafe void AddColumn(
130130
int[] strLens = new int[rowsNumber];
131131
Interop.Copy((int*)colMap, strLens, 0, (int)rowsNumber);
132132

133-
// Use byte-based splitting to properly handle multi-byte UTF-8 characters
133+
// SQL Server sends UTF-8 encoded strings with byte lengths in strLenOrNullMap.
134+
// We decode each segment directly from the byte buffer to properly handle
135+
// multi-byte UTF-8 characters where byte count != character count.
134136
//
135137
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF8ByteSplitToArray((byte*)colData, strLens)));
136138
break;
137139
case SqlDataType.DotNetWChar:
138-
int[] wcharStrLens = new int[rowsNumber];
139-
Interop.Copy((int*)colMap, wcharStrLens, 0, (int)rowsNumber);
140-
141-
// For NCHAR/WCHAR, the strLenOrNullMap contains byte lengths, but we need character counts for UTF16PtrToStr
142-
// Each Unicode character is 2 bytes (sizeof(wchar_t)), so divide by 2 to get character count
143-
//
144-
int[] wcharCharLens = new int[rowsNumber];
145-
for (int i = 0; i < (int)rowsNumber; i++)
146-
{
147-
wcharCharLens[i] = wcharStrLens[i] > 0 ? wcharStrLens[i] / sizeof(char) : wcharStrLens[i];
148-
}
140+
int[] wcharByteLens = new int[rowsNumber];
141+
Interop.Copy((int*)colMap, wcharByteLens, 0, (int)rowsNumber);
149142

150-
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.StringSplitToArray(Interop.UTF16PtrToStr((char*)colData), wcharCharLens)));
143+
// SQL Server sends UTF-16 (nvarchar/nchar) encoded strings with byte lengths in strLenOrNullMap.
144+
// We decode each segment directly from the byte buffer to properly handle
145+
// multi-string buffers where strings are concatenated without null terminators.
146+
//
147+
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF16ByteSplitToArray((byte*)colData, wcharByteLens)));
151148
break;
152149
default:
153150
throw new NotImplementedException("Column type for " + _columns[columnNumber].DataType.ToString() + " has not been implemented yet");

language-extensions/dotnet-core-CSharp/src/managed/CSharpOutputDataSet.cs

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -42,41 +42,25 @@ public class CSharpOutputDataSet: CSharpDataSet
4242
/// This method extracts metadata and actual data for each column supplied
4343
/// by extracting data and information from every DataFrameColumn.
4444
/// </summary>
45-
/// <param name="CSharpDataFrame">The DataFrame containing the output data.</param>
46-
/// <param name="inputColumns">
47-
/// Optional dictionary of input column metadata (not used - C# extension always outputs ANSI strings
48-
/// like Python/R extensions).
49-
/// </param>
50-
public unsafe void ExtractColumns(DataFrame CSharpDataFrame, Dictionary<ushort, CSharpColumn> inputColumns = null)
45+
public unsafe void ExtractColumns(DataFrame dataFrame)
5146
{
5247
Logging.Trace("CSharpOutputDataSet::ExtractColumns");
5348
_strLenOrNullMapPtrs = new int*[ColumnsNumber];
5449
_dataPtrs = new void*[ColumnsNumber];
5550
for(ushort columnNumber = 0; columnNumber < ColumnsNumber; ++columnNumber)
5651
{
57-
DataFrameColumn column = CSharpDataFrame.Columns[columnNumber];
58-
59-
// Determine the SQL data type for this column
60-
// Prefer the incoming input column metadata when available so nvarchar metadata is preserved.
61-
// Default to ANSI mapping to keep existing behavior when no metadata is supplied.
62-
//
63-
SqlDataType dataType = DataTypeMap[column.DataType];
64-
ulong columnSize = (ulong)DataTypeSize[dataType];
65-
if(inputColumns != null && inputColumns.ContainsKey(columnNumber))
66-
{
67-
dataType = inputColumns[columnNumber].DataType;
68-
columnSize = inputColumns[columnNumber].Size;
69-
}
52+
DataFrameColumn column = dataFrame.Columns[columnNumber];
7053

7154
// Add column metadata to a CSharpColumn dictionary
7255
//
56+
SqlDataType dataType = DataTypeMap[column.DataType];
7357
_columns[columnNumber] = new CSharpColumn
7458
{
7559
Name = column.Name,
7660
DataType = dataType,
7761
Nullable = (short)(column.NullCount > 0 ? 1 : 0),
7862
DecimalDigits = 0,
79-
Size = columnSize,
63+
Size = (ulong)DataTypeSize[dataType],
8064
Id = columnNumber
8165
};
8266

@@ -185,8 +169,9 @@ DataFrameColumn column
185169
break;
186170
case SqlDataType.DotNetChar:
187171
// Modify the size of the string column to be the max size of bytes.
172+
// Handle all-null columns by checking if any positive values exist.
188173
//
189-
int maxStrLen = colMap.Max();
174+
int maxStrLen = colMap.Length > 0 ? colMap.Where(x => x > 0).DefaultIfEmpty(0).Max() : 0;
190175
if(maxStrLen > 0)
191176
{
192177
_columns[columnNumber].Size = (ulong)maxStrLen;
@@ -196,11 +181,13 @@ DataFrameColumn column
196181
break;
197182
case SqlDataType.DotNetWChar:
198183
// Preserve nvarchar metadata by emitting UTF-16 data and byte counts.
184+
// Handle all-null columns by checking if any positive values exist.
185+
// Column size is reported in characters (byte length / 2 for UTF-16).
199186
//
200-
int maxUnicodeStrLen = colMap.Max();
201-
if(maxUnicodeStrLen > 0)
187+
int maxUnicodeByteLen = colMap.Length > 0 ? colMap.Where(x => x > 0).DefaultIfEmpty(0).Max() : 0;
188+
if(maxUnicodeByteLen > 0)
202189
{
203-
_columns[columnNumber].Size = (ulong)maxUnicodeStrLen;
190+
_columns[columnNumber].Size = (ulong)(maxUnicodeByteLen / sizeof(char));
204191
}
205192

206193
SetDataPtrs<char>(columnNumber, GetUnicodeStringArray(column));
@@ -252,8 +239,15 @@ private T[] GetArray<T>(DataFrameColumn column) where T : unmanaged
252239
/// This method gets the array from a DataFrameColumn Column for string types by
253240
/// building a long string from the column and returning the underlying bytes as an array.
254241
/// </summary>
242+
/// <param name="column">The DataFrameColumn containing string data.</param>
243+
/// <returns>A byte array containing all non-null string values as UTF-8 encoded bytes.</returns>
255244
private byte[] GetStringArray(DataFrameColumn column)
256245
{
246+
if (column == null)
247+
{
248+
return Array.Empty<byte>();
249+
}
250+
257251
StringBuilder builder = new StringBuilder();
258252
int totalBytes = 0;
259253
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
@@ -282,8 +276,15 @@ private byte[] GetStringArray(DataFrameColumn column)
282276
/// <summary>
283277
/// This method builds a contiguous UTF-16 buffer for string types (nvarchar/nchar).
284278
/// </summary>
279+
/// <param name="column">The DataFrameColumn containing string data.</param>
280+
/// <returns>A char array containing all non-null string values concatenated.</returns>
285281
private char[] GetUnicodeStringArray(DataFrameColumn column)
286282
{
283+
if (column == null)
284+
{
285+
return Array.Empty<char>();
286+
}
287+
287288
StringBuilder builder = new StringBuilder();
288289
int totalBytes = 0;
289290
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
@@ -339,11 +340,11 @@ private int[] GetStrLenNullMap(ushort columnNumber, DataFrameColumn column)
339340
Logging.Trace($"GetStrLenNullMap: Row {rowNumber}, Value='{column[rowNumber]}', ByteLen={colMap[rowNumber]}");
340341
break;
341342
default:
342-
if(!DataTypeSize.ContainsKey(dataType))
343+
if(!DataTypeSize.TryGetValue(dataType, out short size))
343344
{
344-
throw new NotImplementedException("Parameter type for " + dataType.ToString() + " has not been implemented yet");
345+
throw new NotImplementedException("Parameter type for " + dataType + " has not been implemented yet");
345346
}
346-
colMap[rowNumber] = DataTypeSize[dataType];
347+
colMap[rowNumber] = size;
347348
break;
348349
}
349350
}

language-extensions/dotnet-core-CSharp/src/managed/CSharpParamContainer.cs

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,10 @@ public unsafe void AddParam(
136136
_params[paramNumber].Value = Interop.UTF8PtrToStr((char*)paramValue, (ulong)strLenOrNullMap);
137137
break;
138138
case SqlDataType.DotNetWChar:
139-
// For NCHAR/WCHAR, strLenOrNullMap contains byte length, divide by 2 to get character count
139+
// For NCHAR/NVARCHAR, strLenOrNullMap contains byte length.
140+
// In C#, sizeof(char) is always 2 bytes (UTF-16), regardless of platform.
141+
// Note: C++ wchar_t is 2 bytes on Windows but 4 bytes on Linux - this extension only supports Windows.
142+
// The cast to (char*) is correct here because C# char is UTF-16 (same as Windows wchar_t).
140143
//
141144
_params[paramNumber].Value = Interop.UTF16PtrToStr((char*)paramValue, strLenOrNullMap / sizeof(char));
142145
break;
@@ -216,8 +219,13 @@ public unsafe void ReplaceParam(
216219
ReplaceStringParam((string)param.Value, paramValue);
217220
break;
218221
case SqlDataType.DotNetWChar:
219-
// For NCHAR/WCHAR, strLenOrNullMap is in bytes
220-
// param.Size is in characters, so convert to bytes for comparison
222+
// For NCHAR/NVARCHAR, strLenOrNullMap must be in bytes (UTF-16: 2 bytes per character).
223+
// In C#, sizeof(char) is always 2 bytes (UTF-16), regardless of platform.
224+
// Note: C++ wchar_t is 2 bytes on Windows but 4 bytes on Linux - this extension only supports Windows.
225+
// param.Size is the declared parameter size in characters (from SQL Server's NCHAR(n)/NVARCHAR(n)),
226+
// so we multiply by sizeof(char) to convert to bytes.
227+
// Note: *strLenOrNullMap was pre-set to param.Size on line 176, but that value is in
228+
// characters, not bytes, so we recalculate the max byte length here.
221229
//
222230
int wcharByteLen = param.Value.Length * sizeof(char);
223231
int maxByteLen = (int)param.Size * sizeof(char);
@@ -248,13 +256,19 @@ public void HandleCleanup()
248256

249257
/// <summary>
250258
/// This method replaces parameter value for numeric data types.
259+
/// Uses proper memory pinning to ensure the value remains valid after method returns.
251260
/// </summary>
252261
private unsafe void ReplaceNumericParam<T>(
253262
T value,
254263
void **paramValue) where T : unmanaged
255264
{
256-
_handleList.Add(GCHandle.Alloc(value));
257-
*paramValue = &value;
265+
// Box the value into a single-element array to create a heap-allocated copy, then pin it.
266+
// This ensures the pointer remains valid after the method returns.
267+
//
268+
T[] valueArray = new T[1] { value };
269+
GCHandle handle = GCHandle.Alloc(valueArray, GCHandleType.Pinned);
270+
_handleList.Add(handle);
271+
*paramValue = (void*)handle.AddrOfPinnedObject();
258272
}
259273

260274
/// <summary>
@@ -268,20 +282,19 @@ private unsafe void ReplaceStringParam(
268282
{
269283
if(string.IsNullOrEmpty(value))
270284
{
271-
_handleList.Add(GCHandle.Alloc(value));
272-
fixed(void* strPtr = value)
273-
{
274-
*paramValue = strPtr;
275-
}
285+
// For empty/null strings, allocate a single null byte
286+
//
287+
byte[] emptyBytes = new byte[1];
288+
GCHandle handle = GCHandle.Alloc(emptyBytes, GCHandleType.Pinned);
289+
_handleList.Add(handle);
290+
*paramValue = (void*)handle.AddrOfPinnedObject();
276291
}
277292
else
278293
{
279294
byte[] strBytes = Encoding.UTF8.GetBytes(value);
280-
_handleList.Add(GCHandle.Alloc(strBytes));
281-
fixed(void* strPtr = strBytes)
282-
{
283-
*paramValue = strPtr;
284-
}
295+
GCHandle handle = GCHandle.Alloc(strBytes, GCHandleType.Pinned);
296+
_handleList.Add(handle);
297+
*paramValue = (void*)handle.AddrOfPinnedObject();
285298
}
286299
}
287300

@@ -296,20 +309,19 @@ private unsafe void ReplaceUnicodeStringParam(
296309
{
297310
if(string.IsNullOrEmpty(value))
298311
{
299-
_handleList.Add(GCHandle.Alloc(value));
300-
fixed(void* strPtr = value)
301-
{
302-
*paramValue = strPtr;
303-
}
312+
// For empty/null strings, allocate a single null wchar
313+
//
314+
byte[] emptyBytes = new byte[2];
315+
GCHandle handle = GCHandle.Alloc(emptyBytes, GCHandleType.Pinned);
316+
_handleList.Add(handle);
317+
*paramValue = (void*)handle.AddrOfPinnedObject();
304318
}
305319
else
306320
{
307321
byte[] strBytes = Encoding.Unicode.GetBytes(value);
308-
_handleList.Add(GCHandle.Alloc(strBytes));
309-
fixed(void* strPtr = strBytes)
310-
{
311-
*paramValue = strPtr;
312-
}
322+
GCHandle handle = GCHandle.Alloc(strBytes, GCHandleType.Pinned);
323+
_handleList.Add(handle);
324+
*paramValue = (void*)handle.AddrOfPinnedObject();
313325
}
314326
}
315327
}

language-extensions/dotnet-core-CSharp/src/managed/CSharpSession.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,7 @@ public void Execute(
192192
if(_outputDataSet.CSharpDataFrame != null)
193193
{
194194
_outputDataSet.ColumnsNumber = (ushort)_outputDataSet.CSharpDataFrame.Columns.Count;
195-
// Pass input column metadata to preserve data types (e.g., nvarchar vs varchar).
196-
//
197-
_outputDataSet.ExtractColumns(_outputDataSet.CSharpDataFrame, _inputDataSet.Columns);
195+
_outputDataSet.ExtractColumns(_outputDataSet.CSharpDataFrame);
198196
*outputSchemaColumnsNumber = _outputDataSet.ColumnsNumber;
199197
}
200198
else

0 commit comments

Comments
 (0)