Skip to content

Commit 0ac9041

Browse files
committed
Resolve comments
1 parent d094909 commit 0ac9041

14 files changed

+800
-88
lines changed

language-extensions/dotnet-core-CSharp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ For more information about SQL Server Language Extensions, refer to this [docume
77

88
The dotnet-core-CSharp-extension version in this repository is compatible with SQL Server 2019 CU3 onwards. It integrates .NET core in SQL Server and works with .NET 6.0 in **Windows only**.
99

10-
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR, SQL_C_WCHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, varchar(n), nvarchar(n).
10+
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR, and SQL_C_WCHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, char(n), varchar(n), nchar(n), and nvarchar(n).
1111

1212
To use this dotnet-core-CSharp-lang-extension.zip package, follow [this tutorial](./sample/regex/README.md). For any fixes or enhancements, you are welcome to modify, rebuild and use the binaries using the following instructions.
1313

language-extensions/dotnet-core-CSharp/src/managed/CSharpInputDataSet.cs

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,24 +130,31 @@ private unsafe void AddColumn(
130130
int[] strLens = new int[rowsNumber];
131131
Interop.Copy((int*)colMap, strLens, 0, (int)rowsNumber);
132132

133-
// Use byte-based splitting to properly handle multi-byte UTF-8 characters
133+
// Calculate total buffer size from the sum of all positive (non-null) byte lengths.
134+
// This enables bounds checking in UTF8ByteSplitToArray to guard against corrupted length data.
134135
//
135-
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF8ByteSplitToArray((byte*)colData, strLens)));
136+
int charTotalBufferSize = DataSetUtils.CalculateTotalBufferSize(strLens);
137+
138+
// SQL Server sends UTF-8 encoded strings with byte lengths in strLenOrNullMap.
139+
// We decode each segment directly from the byte buffer to properly handle
140+
// multi-byte UTF-8 characters where byte count != character count.
141+
//
142+
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF8ByteSplitToArray((byte*)colData, strLens, charTotalBufferSize)));
136143
break;
137144
case SqlDataType.DotNetWChar:
138-
int[] wcharStrLens = new int[rowsNumber];
139-
Interop.Copy((int*)colMap, wcharStrLens, 0, (int)rowsNumber);
140-
141-
// For NCHAR/WCHAR, the strLenOrNullMap contains byte lengths, but we need character counts for UTF16PtrToStr
142-
// Each Unicode character is 2 bytes (sizeof(wchar_t)), so divide by 2 to get character count
145+
int[] wcharByteLens = new int[rowsNumber];
146+
Interop.Copy((int*)colMap, wcharByteLens, 0, (int)rowsNumber);
147+
148+
// Calculate total buffer size from the sum of all positive (non-null) byte lengths.
149+
// This enables bounds checking in UTF16ByteSplitToArray to guard against corrupted length data.
143150
//
144-
int[] wcharCharLens = new int[rowsNumber];
145-
for (int i = 0; i < (int)rowsNumber; i++)
146-
{
147-
wcharCharLens[i] = wcharStrLens[i] > 0 ? wcharStrLens[i] / sizeof(char) : wcharStrLens[i];
148-
}
151+
int wcharTotalBufferSize = DataSetUtils.CalculateTotalBufferSize(wcharByteLens);
149152

150-
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.StringSplitToArray(Interop.UTF16PtrToStr((char*)colData), wcharCharLens)));
153+
// SQL Server sends UTF-16 (nvarchar/nchar) encoded strings with byte lengths in strLenOrNullMap.
154+
// We decode each segment directly from the byte buffer to properly handle
155+
// multi-string buffers where strings are concatenated without null terminators.
156+
//
157+
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF16ByteSplitToArray((byte*)colData, wcharByteLens, wcharTotalBufferSize)));
151158
break;
152159
default:
153160
throw new NotImplementedException("Column type for " + _columns[columnNumber].DataType.ToString() + " has not been implemented yet");

language-extensions/dotnet-core-CSharp/src/managed/CSharpOutputDataSet.cs

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -42,41 +42,25 @@ public class CSharpOutputDataSet: CSharpDataSet
4242
/// This method extracts metadata and actual data for each column supplied
4343
/// by extracting data and information from every DataFrameColumn.
4444
/// </summary>
45-
/// <param name="CSharpDataFrame">The DataFrame containing the output data.</param>
46-
/// <param name="inputColumns">
47-
/// Optional dictionary of input column metadata (not used - C# extension always outputs ANSI strings
48-
/// like Python/R extensions).
49-
/// </param>
50-
public unsafe void ExtractColumns(DataFrame CSharpDataFrame, Dictionary<ushort, CSharpColumn> inputColumns = null)
45+
public unsafe void ExtractColumns(DataFrame dataFrame)
5146
{
5247
Logging.Trace("CSharpOutputDataSet::ExtractColumns");
5348
_strLenOrNullMapPtrs = new int*[ColumnsNumber];
5449
_dataPtrs = new void*[ColumnsNumber];
5550
for(ushort columnNumber = 0; columnNumber < ColumnsNumber; ++columnNumber)
5651
{
57-
DataFrameColumn column = CSharpDataFrame.Columns[columnNumber];
58-
59-
// Determine the SQL data type for this column
60-
// Prefer the incoming input column metadata when available so nvarchar metadata is preserved.
61-
// Default to ANSI mapping to keep existing behavior when no metadata is supplied.
62-
//
63-
SqlDataType dataType = DataTypeMap[column.DataType];
64-
ulong columnSize = (ulong)DataTypeSize[dataType];
65-
if(inputColumns != null && inputColumns.ContainsKey(columnNumber))
66-
{
67-
dataType = inputColumns[columnNumber].DataType;
68-
columnSize = inputColumns[columnNumber].Size;
69-
}
52+
DataFrameColumn column = dataFrame.Columns[columnNumber];
7053

7154
// Add column metadata to a CSharpColumn dictionary
7255
//
56+
SqlDataType dataType = DataTypeMap[column.DataType];
7357
_columns[columnNumber] = new CSharpColumn
7458
{
7559
Name = column.Name,
7660
DataType = dataType,
7761
Nullable = (short)(column.NullCount > 0 ? 1 : 0),
7862
DecimalDigits = 0,
79-
Size = columnSize,
63+
Size = (ulong)DataTypeSize[dataType],
8064
Id = columnNumber
8165
};
8266

@@ -185,8 +169,9 @@ DataFrameColumn column
185169
break;
186170
case SqlDataType.DotNetChar:
187171
// Modify the size of the string column to be the max size of bytes.
172+
// Handle all-null columns by checking if any positive values exist.
188173
//
189-
int maxStrLen = colMap.Max();
174+
int maxStrLen = colMap.Length > 0 ? colMap.Where(x => x > 0).DefaultIfEmpty(0).Max() : 0;
190175
if(maxStrLen > 0)
191176
{
192177
_columns[columnNumber].Size = (ulong)maxStrLen;
@@ -196,17 +181,19 @@ DataFrameColumn column
196181
break;
197182
case SqlDataType.DotNetWChar:
198183
// Preserve nvarchar metadata by emitting UTF-16 data and byte counts.
184+
// Handle all-null columns by checking if any positive values exist.
185+
// Column size is reported in characters (byte length / 2 for UTF-16).
199186
//
200-
int maxUnicodeStrLen = colMap.Max();
201-
if(maxUnicodeStrLen > 0)
187+
int maxUnicodeByteLen = colMap.Length > 0 ? colMap.Where(x => x > 0).DefaultIfEmpty(0).Max() : 0;
188+
if(maxUnicodeByteLen > 0)
202189
{
203-
_columns[columnNumber].Size = (ulong)maxUnicodeStrLen;
190+
_columns[columnNumber].Size = (ulong)(maxUnicodeByteLen / sizeof(char));
204191
}
205192

206193
SetDataPtrs<char>(columnNumber, GetUnicodeStringArray(column));
207194
break;
208195
default:
209-
throw new NotImplementedException("Parameter type for " + DataTypeMap[column.DataType].ToString() + " has not been implemented yet");
196+
throw new NotImplementedException("Parameter type for " + DataTypeMap[column.DataType] + " has not been implemented yet");
210197
}
211198
}
212199

@@ -228,6 +215,11 @@ T[] array
228215
/// </summary>
229216
private T[] GetArray<T>(DataFrameColumn column) where T : unmanaged
230217
{
218+
if (column == null)
219+
{
220+
return Array.Empty<T>();
221+
}
222+
231223
T[] columnArray = new T[column.Length];
232224
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
233225
{
@@ -252,8 +244,15 @@ private T[] GetArray<T>(DataFrameColumn column) where T : unmanaged
252244
/// This method gets the array from a DataFrameColumn Column for string types by
253245
/// building a long string from the column and returning the underlying bytes as an array.
254246
/// </summary>
247+
/// <param name="column">The DataFrameColumn containing string data.</param>
248+
/// <returns>A byte array containing all non-null string values as UTF-8 encoded bytes.</returns>
255249
private byte[] GetStringArray(DataFrameColumn column)
256250
{
251+
if (column == null)
252+
{
253+
return Array.Empty<byte>();
254+
}
255+
257256
StringBuilder builder = new StringBuilder();
258257
int totalBytes = 0;
259258
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
@@ -282,8 +281,15 @@ private byte[] GetStringArray(DataFrameColumn column)
282281
/// <summary>
283282
/// This method builds a contiguous UTF-16 buffer for string types (nvarchar/nchar).
284283
/// </summary>
284+
/// <param name="column">The DataFrameColumn containing string data.</param>
285+
/// <returns>A char array containing all non-null string values concatenated.</returns>
285286
private char[] GetUnicodeStringArray(DataFrameColumn column)
286287
{
288+
if (column == null)
289+
{
290+
return Array.Empty<char>();
291+
}
292+
287293
StringBuilder builder = new StringBuilder();
288294
int totalBytes = 0;
289295
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
@@ -315,6 +321,11 @@ private char[] GetUnicodeStringArray(DataFrameColumn column)
315321
/// <returns>Array of string lengths or null indicators for each row</returns>
316322
private int[] GetStrLenNullMap(ushort columnNumber, DataFrameColumn column)
317323
{
324+
if (column == null)
325+
{
326+
return Array.Empty<int>();
327+
}
328+
318329
int[] colMap = new int[column.Length];
319330

320331
SqlDataType dataType = _columns[columnNumber].DataType;
@@ -339,11 +350,11 @@ private int[] GetStrLenNullMap(ushort columnNumber, DataFrameColumn column)
339350
Logging.Trace($"GetStrLenNullMap: Row {rowNumber}, Value='{column[rowNumber]}', ByteLen={colMap[rowNumber]}");
340351
break;
341352
default:
342-
if(!DataTypeSize.ContainsKey(dataType))
353+
if(!DataTypeSize.TryGetValue(dataType, out short size))
343354
{
344-
throw new NotImplementedException("Parameter type for " + dataType.ToString() + " has not been implemented yet");
355+
throw new NotImplementedException("Parameter type for " + dataType + " has not been implemented yet");
345356
}
346-
colMap[rowNumber] = DataTypeSize[dataType];
357+
colMap[rowNumber] = size;
347358
break;
348359
}
349360
}

language-extensions/dotnet-core-CSharp/src/managed/CSharpParamContainer.cs

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,10 @@ public unsafe void AddParam(
136136
_params[paramNumber].Value = Interop.UTF8PtrToStr((char*)paramValue, (ulong)strLenOrNullMap);
137137
break;
138138
case SqlDataType.DotNetWChar:
139-
// For NCHAR/WCHAR, strLenOrNullMap contains byte length, divide by 2 to get character count
139+
// For NCHAR/NVARCHAR, strLenOrNullMap contains byte length.
140+
// In C#, sizeof(char) is always 2 bytes (UTF-16), regardless of platform.
141+
// Note: C++ wchar_t is 2 bytes on Windows but 4 bytes on Linux - this extension only supports Windows.
142+
// The cast to (char*) is correct here because C# char is UTF-16 (same as Windows wchar_t).
140143
//
141144
_params[paramNumber].Value = Interop.UTF16PtrToStr((char*)paramValue, strLenOrNullMap / sizeof(char));
142145
break;
@@ -157,13 +160,13 @@ public unsafe void ReplaceParam(
157160
int *strLenOrNullMap)
158161
{
159162
Logging.Trace("CSharpParamContainer::ReplaceParam");
160-
if(!UserParams.ContainsKey(_params[paramNumber].Name))
163+
if(!UserParams.TryGetValue(_params[paramNumber].Name, out object paramValue_))
161164
{
162165
*strLenOrNullMap = SQL_NULL_DATA;
163166
return;
164167
}
165168

166-
_params[paramNumber].Value = UserParams[_params[paramNumber].Name];
169+
_params[paramNumber].Value = paramValue_;
167170
CSharpParam param = _params[paramNumber];
168171
if(param.Value == null)
169172
{
@@ -212,16 +215,25 @@ public unsafe void ReplaceParam(
212215
ReplaceNumericParam<bool>(boolValue, paramValue);
213216
break;
214217
case SqlDataType.DotNetChar:
215-
*strLenOrNullMap = (param.Value.Length < *strLenOrNullMap) ? param.Value.Length : *strLenOrNullMap;
218+
// For CHAR/VARCHAR, strLenOrNullMap is in bytes (1 byte per character for ANSI).
219+
// param.Size is the declared parameter size in characters (from SQL Server's CHAR(n)/VARCHAR(n)).
220+
// For ANSI strings, character count equals byte count.
221+
//
222+
int charByteLen = param.Value.Length;
223+
int charMaxByteLen = (int)param.Size;
224+
*strLenOrNullMap = (charByteLen < charMaxByteLen) ? charByteLen : charMaxByteLen;
216225
ReplaceStringParam((string)param.Value, paramValue);
217226
break;
218227
case SqlDataType.DotNetWChar:
219-
// For NCHAR/WCHAR, strLenOrNullMap is in bytes
220-
// param.Size is in characters, so convert to bytes for comparison
228+
// For NCHAR/NVARCHAR, strLenOrNullMap must be in bytes (UTF-16: 2 bytes per character).
229+
// In C#, sizeof(char) is always 2 bytes (UTF-16), regardless of platform.
230+
// Note: C++ wchar_t is 2 bytes on Windows but 4 bytes on Linux - this extension only supports Windows.
231+
// param.Size is the declared parameter size in characters (from SQL Server's NCHAR(n)/NVARCHAR(n)),
232+
// so we multiply by sizeof(char) to convert to bytes.
221233
//
222234
int wcharByteLen = param.Value.Length * sizeof(char);
223-
int maxByteLen = (int)param.Size * sizeof(char);
224-
*strLenOrNullMap = (wcharByteLen < maxByteLen) ? wcharByteLen : maxByteLen;
235+
int wcharMaxByteLen = (int)param.Size * sizeof(char);
236+
*strLenOrNullMap = (wcharByteLen < wcharMaxByteLen) ? wcharByteLen : wcharMaxByteLen;
225237
ReplaceUnicodeStringParam((string)param.Value, paramValue);
226238
break;
227239
default:
@@ -248,13 +260,19 @@ public void HandleCleanup()
248260

249261
/// <summary>
250262
/// This method replaces parameter value for numeric data types.
263+
/// Uses proper memory pinning to ensure the value remains valid after method returns.
251264
/// </summary>
252265
private unsafe void ReplaceNumericParam<T>(
253266
T value,
254267
void **paramValue) where T : unmanaged
255268
{
256-
_handleList.Add(GCHandle.Alloc(value));
257-
*paramValue = &value;
269+
// Box the value into a single-element array to create a heap-allocated copy, then pin it.
270+
// This ensures the pointer remains valid after the method returns.
271+
//
272+
T[] valueArray = new T[1] { value };
273+
GCHandle handle = GCHandle.Alloc(valueArray, GCHandleType.Pinned);
274+
_handleList.Add(handle);
275+
*paramValue = (void*)handle.AddrOfPinnedObject();
258276
}
259277

260278
/// <summary>
@@ -268,20 +286,19 @@ private unsafe void ReplaceStringParam(
268286
{
269287
if(string.IsNullOrEmpty(value))
270288
{
271-
_handleList.Add(GCHandle.Alloc(value));
272-
fixed(void* strPtr = value)
273-
{
274-
*paramValue = strPtr;
275-
}
289+
// For empty/null strings, allocate a single null byte
290+
//
291+
byte[] emptyBytes = new byte[1];
292+
GCHandle handle = GCHandle.Alloc(emptyBytes, GCHandleType.Pinned);
293+
_handleList.Add(handle);
294+
*paramValue = (void*)handle.AddrOfPinnedObject();
276295
}
277296
else
278297
{
279298
byte[] strBytes = Encoding.UTF8.GetBytes(value);
280-
_handleList.Add(GCHandle.Alloc(strBytes));
281-
fixed(void* strPtr = strBytes)
282-
{
283-
*paramValue = strPtr;
284-
}
299+
GCHandle handle = GCHandle.Alloc(strBytes, GCHandleType.Pinned);
300+
_handleList.Add(handle);
301+
*paramValue = (void*)handle.AddrOfPinnedObject();
285302
}
286303
}
287304

@@ -296,20 +313,19 @@ private unsafe void ReplaceUnicodeStringParam(
296313
{
297314
if(string.IsNullOrEmpty(value))
298315
{
299-
_handleList.Add(GCHandle.Alloc(value));
300-
fixed(void* strPtr = value)
301-
{
302-
*paramValue = strPtr;
303-
}
316+
// For empty/null strings, allocate a single null wchar
317+
//
318+
byte[] emptyBytes = new byte[2];
319+
GCHandle handle = GCHandle.Alloc(emptyBytes, GCHandleType.Pinned);
320+
_handleList.Add(handle);
321+
*paramValue = (void*)handle.AddrOfPinnedObject();
304322
}
305323
else
306324
{
307325
byte[] strBytes = Encoding.Unicode.GetBytes(value);
308-
_handleList.Add(GCHandle.Alloc(strBytes));
309-
fixed(void* strPtr = strBytes)
310-
{
311-
*paramValue = strPtr;
312-
}
326+
GCHandle handle = GCHandle.Alloc(strBytes, GCHandleType.Pinned);
327+
_handleList.Add(handle);
328+
*paramValue = (void*)handle.AddrOfPinnedObject();
313329
}
314330
}
315331
}

0 commit comments

Comments
 (0)