Skip to content

Commit d094909

Browse files
committed
Nvarchar support
1 parent d829ed2 commit d094909

File tree

13 files changed

+508
-40
lines changed

13 files changed

+508
-40
lines changed

language-extensions/dotnet-core-CSharp/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ For more information about SQL Server Language Extensions, refer to this [docume
77

88
The dotnet-core-CSharp-extension version in this repository is compatible with SQL Server 2019 CU3 onwards. It integrates .NET core in SQL Server and works with .NET 6.0 in **Windows only**.
99

10-
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, varchar(n).
10+
Currently, the extension supports the following data types: SQL_C_SLONG, SQL_C_ULONG, SQL_C_SSHORT, SQL_C_USHORT, SQL_C_SBIGINT, SQL_C_UBIGINT, SQL_C_STINYINT, SQL_C_UTINYINT, SQL_C_BIT, SQL_C_FLOAT, SQL_C_DOUBLE, SQL_C_CHAR, SQL_C_WCHAR. It supports the following SQL data types: int, bigint, smallint, tinyint, real, float, bit, varchar(n), nvarchar(n).
1111

1212
To use this dotnet-core-CSharp-lang-extension.zip package, follow [this tutorial](./sample/regex/README.md). For any fixes or enhancements, you are welcome to modify, rebuild and use the binaries using the following instructions.
1313

language-extensions/dotnet-core-CSharp/src/managed/CSharpDataSet.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,10 @@ private set
4646
/// This dictionary contains all the columns metadata as CSharpColumn objects.
4747
/// </summary>
4848
protected Dictionary<ushort, CSharpColumn> _columns = new Dictionary<ushort, CSharpColumn>();
49+
50+
/// <summary>
51+
/// Gets the columns metadata dictionary.
52+
/// </summary>
53+
public Dictionary<ushort, CSharpColumn> Columns => _columns;
4954
}
5055
}

language-extensions/dotnet-core-CSharp/src/managed/CSharpInputDataSet.cs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,25 @@ private unsafe void AddColumn(
129129
case SqlDataType.DotNetChar:
130130
int[] strLens = new int[rowsNumber];
131131
Interop.Copy((int*)colMap, strLens, 0, (int)rowsNumber);
132-
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.StringSplitToArray(Interop.UTF8PtrToStr((char*)colData), strLens)));
132+
133+
// Use byte-based splitting to properly handle multi-byte UTF-8 characters
134+
//
135+
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.UTF8ByteSplitToArray((byte*)colData, strLens)));
136+
break;
137+
case SqlDataType.DotNetWChar:
138+
int[] wcharStrLens = new int[rowsNumber];
139+
Interop.Copy((int*)colMap, wcharStrLens, 0, (int)rowsNumber);
140+
141+
// For NCHAR/WCHAR, the strLenOrNullMap contains byte lengths, but we need character counts for UTF16PtrToStr
142+
// Each Unicode character is 2 bytes (sizeof(wchar_t)), so divide by 2 to get character count
143+
//
144+
int[] wcharCharLens = new int[rowsNumber];
145+
for (int i = 0; i < (int)rowsNumber; i++)
146+
{
147+
wcharCharLens[i] = wcharStrLens[i] > 0 ? wcharStrLens[i] / sizeof(char) : wcharStrLens[i];
148+
}
149+
150+
CSharpDataFrame.Columns.Add(new StringDataFrameColumn(_columns[columnNumber].Name, DataSetUtils.StringSplitToArray(Interop.UTF16PtrToStr((char*)colData), wcharCharLens)));
133151
break;
134152
default:
135153
throw new NotImplementedException("Column type for " + _columns[columnNumber].DataType.ToString() + " has not been implemented yet");

language-extensions/dotnet-core-CSharp/src/managed/CSharpOutputDataSet.cs

Lines changed: 112 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,12 @@ public class CSharpOutputDataSet: CSharpDataSet
4242
/// This method extracts metadata and actual data for each column supplied
4343
/// by extracting data and information from every DataFrameColumn.
4444
/// </summary>
45-
public unsafe void ExtractColumns(DataFrame CSharpDataFrame)
45+
/// <param name="CSharpDataFrame">The DataFrame containing the output data.</param>
46+
/// <param name="inputColumns">
47+
/// Optional dictionary of input column metadata (not used - C# extension always outputs ANSI strings
48+
/// like Python/R extensions).
49+
/// </param>
50+
public unsafe void ExtractColumns(DataFrame CSharpDataFrame, Dictionary<ushort, CSharpColumn> inputColumns = null)
4651
{
4752
Logging.Trace("CSharpOutputDataSet::ExtractColumns");
4853
_strLenOrNullMapPtrs = new int*[ColumnsNumber];
@@ -51,15 +56,27 @@ public unsafe void ExtractColumns(DataFrame CSharpDataFrame)
5156
{
5257
DataFrameColumn column = CSharpDataFrame.Columns[columnNumber];
5358

59+
// Determine the SQL data type for this column
60+
// Prefer the incoming input column metadata when available so nvarchar metadata is preserved.
61+
// Default to ANSI mapping to keep existing behavior when no metadata is supplied.
62+
//
63+
SqlDataType dataType = DataTypeMap[column.DataType];
64+
ulong columnSize = (ulong)DataTypeSize[dataType];
65+
if(inputColumns != null && inputColumns.ContainsKey(columnNumber))
66+
{
67+
dataType = inputColumns[columnNumber].DataType;
68+
columnSize = inputColumns[columnNumber].Size;
69+
}
70+
5471
// Add column metadata to a CSharpColumn dictionary
5572
//
5673
_columns[columnNumber] = new CSharpColumn
5774
{
5875
Name = column.Name,
59-
DataType = DataTypeMap[column.DataType],
76+
DataType = dataType,
6077
Nullable = (short)(column.NullCount > 0 ? 1 : 0),
6178
DecimalDigits = 0,
62-
Size = (ulong)DataTypeSize[DataTypeMap[column.DataType]],
79+
Size = columnSize,
6380
Id = columnNumber
6481
};
6582

@@ -79,17 +96,16 @@ public unsafe void RetrieveColumns(
7996
)
8097
{
8198
Logging.Trace("CSharpOutputDataSet::RetrieveColumns");
82-
fixed (void** ptrptr = _dataPtrs)
83-
{
84-
_handleList.Add(GCHandle.Alloc(_dataPtrs));
85-
*data = ptrptr;
86-
}
99+
100+
// Pin the pointer arrays and get their addresses
101+
//
102+
GCHandle dataHandle = GCHandle.Alloc(_dataPtrs, GCHandleType.Pinned);
103+
*data = (void**)dataHandle.AddrOfPinnedObject();
104+
_handleList.Add(dataHandle);
87105

88-
fixed (int** ptrptr = _strLenOrNullMapPtrs)
89-
{
90-
_handleList.Add(GCHandle.Alloc(_strLenOrNullMapPtrs));
91-
*strLenOrNullMap = ptrptr;
92-
}
106+
GCHandle strLenHandle = GCHandle.Alloc(_strLenOrNullMapPtrs, GCHandleType.Pinned);
107+
*strLenOrNullMap = (int**)strLenHandle.AddrOfPinnedObject();
108+
_handleList.Add(strLenHandle);
93109
}
94110

95111
/// <summary>
@@ -121,13 +137,15 @@ DataFrameColumn column
121137
)
122138
{
123139
Logging.Trace("CSharpOutputDataSet::ExtractColumn");
124-
int[] colMap = GetStrLenNullMap(column);
125-
fixed(int* len = colMap)
126-
{
127-
_strLenOrNullMapPtrs[columnNumber] = len;
128-
}
140+
int[] colMap = GetStrLenNullMap(columnNumber, column);
141+
GCHandle colMapHandle = GCHandle.Alloc(colMap, GCHandleType.Pinned);
142+
_strLenOrNullMapPtrs[columnNumber] = (int*)colMapHandle.AddrOfPinnedObject();
143+
_handleList.Add(colMapHandle);
129144

130-
switch(DataTypeMap[column.DataType])
145+
// Use the data type already determined in ExtractColumns (which preserves nvarchar/varchar distinction)
146+
// instead of DataTypeMap[column.DataType] which always maps string to DotNetChar
147+
//
148+
switch(_columns[columnNumber].DataType)
131149
{
132150
case SqlDataType.DotNetInteger:
133151
SetDataPtrs<int>(columnNumber, GetArray<int>(column));
@@ -176,6 +194,17 @@ DataFrameColumn column
176194

177195
SetDataPtrs<byte>(columnNumber, GetStringArray(column));
178196
break;
197+
case SqlDataType.DotNetWChar:
198+
// Preserve nvarchar metadata by emitting UTF-16 data and byte counts.
199+
//
200+
int maxUnicodeStrLen = colMap.Max();
201+
if(maxUnicodeStrLen > 0)
202+
{
203+
_columns[columnNumber].Size = (ulong)maxUnicodeStrLen;
204+
}
205+
206+
SetDataPtrs<char>(columnNumber, GetUnicodeStringArray(column));
207+
break;
179208
default:
180209
throw new NotImplementedException("Parameter type for " + DataTypeMap[column.DataType].ToString() + " has not been implemented yet");
181210
}
@@ -226,49 +255,102 @@ private T[] GetArray<T>(DataFrameColumn column) where T : unmanaged
226255
private byte[] GetStringArray(DataFrameColumn column)
227256
{
228257
StringBuilder builder = new StringBuilder();
258+
int totalBytes = 0;
229259
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
230260
{
231261
// In case of null strings, nothing will be added to the returned data.
232262
//
233263
if(column[rowNumber] != null)
234264
{
235-
builder.Append(column[rowNumber]);
265+
string value = (string)column[rowNumber];
266+
int byteLen = Encoding.UTF8.GetByteCount(value);
267+
Logging.Trace($"GetStringArray: Row {rowNumber}, Value='{value}', ByteLen={byteLen}, CurrentOffset={totalBytes}");
268+
builder.Append(value);
269+
totalBytes += byteLen;
270+
}
271+
else
272+
{
273+
Logging.Trace($"GetStringArray: Row {rowNumber} is NULL");
274+
}
275+
}
276+
277+
byte[] result = Encoding.UTF8.GetBytes(builder.ToString());
278+
Logging.Trace($"GetStringArray: Total buffer size={result.Length}, TotalBytesCalculated={totalBytes}");
279+
return result;
280+
}
281+
282+
/// <summary>
283+
/// This method builds a contiguous UTF-16 buffer for string types (nvarchar/nchar).
284+
/// </summary>
285+
private char[] GetUnicodeStringArray(DataFrameColumn column)
286+
{
287+
StringBuilder builder = new StringBuilder();
288+
int totalBytes = 0;
289+
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
290+
{
291+
if(column[rowNumber] != null)
292+
{
293+
string value = (string)column[rowNumber];
294+
int byteLen = Encoding.Unicode.GetByteCount(value);
295+
Logging.Trace($"GetUnicodeStringArray: Row {rowNumber}, Value='{value}', ByteLen={byteLen}, CurrentOffset={totalBytes}");
296+
builder.Append(value);
297+
totalBytes += byteLen;
298+
}
299+
else
300+
{
301+
Logging.Trace($"GetUnicodeStringArray: Row {rowNumber} is NULL");
236302
}
237303
}
238304

239-
return Encoding.UTF8.GetBytes(builder.ToString());
305+
char[] result = builder.ToString().ToCharArray();
306+
Logging.Trace($"GetUnicodeStringArray: Total buffer size={result.Length * sizeof(char)}, TotalBytesCalculated={totalBytes}");
307+
return result;
240308
}
241309

242310
/// <summary>
243311
/// This method gets the StrLenNullMap from a DataFrameColumn Column.
244312
/// </summary>
245-
private int[] GetStrLenNullMap(DataFrameColumn column)
313+
/// <param name="columnNumber">The column index, used to look up the correct data type from _columns</param>
314+
/// <param name="column">The DataFrameColumn containing the data</param>
315+
/// <returns>Array of string lengths or null indicators for each row</returns>
316+
private int[] GetStrLenNullMap(ushort columnNumber, DataFrameColumn column)
246317
{
247318
int[] colMap = new int[column.Length];
248-
_handleList.Add(GCHandle.Alloc(colMap, GCHandleType.Pinned));
319+
320+
SqlDataType dataType = _columns[columnNumber].DataType;
321+
Logging.Trace($"GetStrLenNullMap: Column {columnNumber}, DataType={dataType}, RowCount={column.Length}");
322+
249323
for(int rowNumber = 0; rowNumber < column.Length; ++rowNumber)
250324
{
251325
if(column[rowNumber] != null)
252326
{
253-
if(!DataTypeMap.ContainsKey(column.DataType))
254-
{
255-
throw new NotImplementedException("Parameter type for " + column.DataType.ToString() + " has not been implemented yet");
256-
}
257-
258-
SqlDataType dataType = DataTypeMap[column.DataType];
259327
switch(dataType)
260328
{
261329
case SqlDataType.DotNetChar:
262-
colMap[rowNumber] = ((string)column[rowNumber]).Length;
330+
// Must match the actual byte count from Encoding.UTF8.GetBytes()
331+
//
332+
colMap[rowNumber] = Encoding.UTF8.GetByteCount((string)column[rowNumber]);
333+
Logging.Trace($"GetStrLenNullMap: Row {rowNumber}, Value='{column[rowNumber]}', ByteLen={colMap[rowNumber]}");
334+
break;
335+
case SqlDataType.DotNetWChar:
336+
// For nvarchar output, report UTF-16 byte length to match the emitted buffer.
337+
//
338+
colMap[rowNumber] = Encoding.Unicode.GetByteCount((string)column[rowNumber]);
339+
Logging.Trace($"GetStrLenNullMap: Row {rowNumber}, Value='{column[rowNumber]}', ByteLen={colMap[rowNumber]}");
263340
break;
264341
default:
342+
if(!DataTypeSize.ContainsKey(dataType))
343+
{
344+
throw new NotImplementedException("Parameter type for " + dataType.ToString() + " has not been implemented yet");
345+
}
265346
colMap[rowNumber] = DataTypeSize[dataType];
266347
break;
267348
}
268349
}
269350
else
270351
{
271352
colMap[rowNumber] = SQL_NULL_DATA;
353+
Logging.Trace($"GetStrLenNullMap: Row {rowNumber} is NULL");
272354
}
273355
}
274356

language-extensions/dotnet-core-CSharp/src/managed/CSharpParamContainer.cs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ public unsafe void AddParam(
135135
case SqlDataType.DotNetChar:
136136
_params[paramNumber].Value = Interop.UTF8PtrToStr((char*)paramValue, (ulong)strLenOrNullMap);
137137
break;
138+
case SqlDataType.DotNetWChar:
139+
// For NCHAR/WCHAR, strLenOrNullMap contains byte length, divide by 2 to get character count
140+
//
141+
_params[paramNumber].Value = Interop.UTF16PtrToStr((char*)paramValue, strLenOrNullMap / sizeof(char));
142+
break;
138143
default:
139144
throw new NotImplementedException("Parameter type for " + dataType.ToString() + " has not been implemented yet");
140145
}
@@ -210,6 +215,15 @@ public unsafe void ReplaceParam(
210215
*strLenOrNullMap = (param.Value.Length < *strLenOrNullMap) ? param.Value.Length : *strLenOrNullMap;
211216
ReplaceStringParam((string)param.Value, paramValue);
212217
break;
218+
case SqlDataType.DotNetWChar:
219+
// For NCHAR/WCHAR, strLenOrNullMap is in bytes
220+
// param.Size is in characters, so convert to bytes for comparison
221+
//
222+
int wcharByteLen = param.Value.Length * sizeof(char);
223+
int maxByteLen = (int)param.Size * sizeof(char);
224+
*strLenOrNullMap = (wcharByteLen < maxByteLen) ? wcharByteLen : maxByteLen;
225+
ReplaceUnicodeStringParam((string)param.Value, paramValue);
226+
break;
213227
default:
214228
throw new NotImplementedException("Parameter type for " + param.DataType.ToString() + " has not been implemented yet");
215229
}
@@ -270,5 +284,33 @@ private unsafe void ReplaceStringParam(
270284
}
271285
}
272286
}
287+
288+
/// <summary>
289+
/// This method replaces parameter value for Unicode string data types.
290+
/// If the string is not empty, the address of underlying Unicode bytes will be assigned to paramValue.
291+
/// </summary>
292+
private unsafe void ReplaceUnicodeStringParam(
293+
string value,
294+
void **paramValue
295+
)
296+
{
297+
if(string.IsNullOrEmpty(value))
298+
{
299+
_handleList.Add(GCHandle.Alloc(value));
300+
fixed(void* strPtr = value)
301+
{
302+
*paramValue = strPtr;
303+
}
304+
}
305+
else
306+
{
307+
byte[] strBytes = Encoding.Unicode.GetBytes(value);
308+
_handleList.Add(GCHandle.Alloc(strBytes));
309+
fixed(void* strPtr = strBytes)
310+
{
311+
*paramValue = strPtr;
312+
}
313+
}
314+
}
273315
}
274316
}

language-extensions/dotnet-core-CSharp/src/managed/CSharpSession.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,9 @@ public void Execute(
192192
if(_outputDataSet.CSharpDataFrame != null)
193193
{
194194
_outputDataSet.ColumnsNumber = (ushort)_outputDataSet.CSharpDataFrame.Columns.Count;
195-
_outputDataSet.ExtractColumns(_outputDataSet.CSharpDataFrame);
195+
// Pass input column metadata to preserve data types (e.g., nvarchar vs varchar).
196+
//
197+
_outputDataSet.ExtractColumns(_outputDataSet.CSharpDataFrame, _inputDataSet.Columns);
196198
*outputSchemaColumnsNumber = _outputDataSet.ColumnsNumber;
197199
}
198200
else

0 commit comments

Comments
 (0)