FIX: Encoding Decoding by jahnvi480 ยท Pull Request #342 ยท microsoft/mssql-python

๐Ÿ“Š Code Coverage Report

๐Ÿ”ฅ Diff Coverage

64%


๐ŸŽฏ Overall Coverage

74%


๐Ÿ“ˆ Total Lines Covered: 5191 out of 6929
๐Ÿ“ Project: mssql-python


Diff Coverage

Diff: main...HEAD, staged and unstaged changes

  • mssql_python/connection.py (100%)
  • mssql_python/cursor.py (100%)
  • mssql_python/pybind/ddbc_bindings.cpp (52.1%): Missing lines 327,329,331-346,348-350,352-358,361,1745-1759,1761-1762,2047-2064,2074,2546,2849-2850,2852-2853,2930-2932,2934-2935,2941-2942,3275-3276
  • mssql_python/pybind/ddbc_bindings.h (0.0%): Missing lines 828-830

Summary

  • Total: 230 lines
  • Missing: 82 lines
  • Coverage: 64%

mssql_python/pybind/ddbc_bindings.cpp

Lines 323-365

  323                     *strLenOrIndPtr = SQL_LEN_DATA_AT_EXEC(0);
  324                     bufferLength = 0;
  325                 } else {
  326                     // Use Python's codec system to encode the string with specified encoding
! 327                     std::string encodedStr;
  328 
! 329                     if (py::isinstance<py::str>(param)) {
  330                         // Encode Unicode string using the specified encoding
! 331                         try {
! 332                             py::object encoded = param.attr("encode")(charEncoding, "strict");
! 333                             encodedStr = encoded.cast<std::string>();
! 334                             LOG("BindParameters: param[%d] SQL_C_CHAR - Encoded with '%s', "
! 335                                 "size=%zu bytes",
! 336                                 paramIndex, charEncoding.c_str(), encodedStr.size());
! 337                         } catch (const py::error_already_set& e) {
! 338                             LOG_ERROR("BindParameters: param[%d] SQL_C_CHAR - Failed to encode "
! 339                                       "with '%s': %s",
! 340                                       paramIndex, charEncoding.c_str(), e.what());
! 341                             throw std::runtime_error(std::string("Failed to encode parameter ") +
! 342                                                      std::to_string(paramIndex) +
! 343                                                      " with encoding '" + charEncoding +
! 344                                                      "': " + e.what());
! 345                         }
! 346                     } else {
  347                         // bytes/bytearray - use as-is (already encoded)
! 348                         if (py::isinstance<py::bytes>(param)) {
! 349                             encodedStr = param.cast<std::string>();
! 350                         } else {
  351                             // bytearray
! 352                             encodedStr = std::string(
! 353                                 reinterpret_cast<const char*>(PyByteArray_AsString(param.ptr())),
! 354                                 PyByteArray_Size(param.ptr()));
! 355                         }
! 356                         LOG("BindParameters: param[%d] SQL_C_CHAR - Using raw bytes, size=%zu",
! 357                             paramIndex, encodedStr.size());
! 358                     }
  359 
  360                     std::string* strParam =
! 361                         AllocateParamBuffer<std::string>(paramBuffers, encodedStr);
  362                     dataPtr = const_cast<void*>(static_cast<const void*>(strParam->c_str()));
  363                     bufferLength = strParam->size() + 1;
  364                     strLenOrIndPtr = AllocateParamBuffer<SQLLEN>(paramBuffers);
  365                     *strLenOrIndPtr = SQL_NTS;

Lines 1741-1766

  1741                             offset += len;
  1742                         }
  1743                     } else if (matchedInfo->paramCType == SQL_C_CHAR) {
  1744                         // Encode the string using the specified encoding
! 1745                         std::string encodedStr;
! 1746                         try {
! 1747                             if (py::isinstance<py::str>(pyObj)) {
! 1748                                 py::object encoded = pyObj.attr("encode")(charEncoding, "strict");
! 1749                                 encodedStr = encoded.cast<std::string>();
! 1750                                 LOG("SQLExecute: DAE SQL_C_CHAR - Encoded with '%s', %zu bytes",
! 1751                                     charEncoding.c_str(), encodedStr.size());
! 1752                             } else {
! 1753                                 encodedStr = pyObj.cast<std::string>();
! 1754                             }
! 1755                         } catch (const py::error_already_set& e) {
! 1756                             LOG_ERROR("SQLExecute: DAE SQL_C_CHAR - Failed to encode with '%s': %s",
! 1757                                       charEncoding.c_str(), e.what());
! 1758                             throw;
! 1759                         }
  1760 
! 1761                         size_t totalBytes = encodedStr.size();
! 1762                         const char* dataPtr = encodedStr.data();
  1763                         size_t offset = 0;
  1764                         size_t chunkBytes = DAE_CHUNK_SIZE;
  1765                         while (offset < totalBytes) {
  1766                             size_t len = std::min(chunkBytes, totalBytes - offset);

Lines 2043-2068

  2043 
  2044                             if (py::isinstance<py::str>(columnValues[i])) {
  2045                                 // Use Python's codec system to encode the string with specified
  2046                                 // encoding
! 2047                                 try {
! 2048                                     py::object encoded =
! 2049                                         columnValues[i].attr("encode")(charEncoding, "strict");
! 2050                                     encodedStr = encoded.cast<std::string>();
! 2051                                     LOG("BindParameterArray: param[%d] row[%zu] SQL_C_CHAR - "
! 2052                                         "Encoded with '%s', "
! 2053                                         "size=%zu bytes",
! 2054                                         paramIndex, i, charEncoding.c_str(), encodedStr.size());
! 2055                                 } catch (const py::error_already_set& e) {
! 2056                                     LOG_ERROR("BindParameterArray: param[%d] row[%zu] SQL_C_CHAR - "
! 2057                                               "Failed to encode "
! 2058                                               "with '%s': %s",
! 2059                                               paramIndex, i, charEncoding.c_str(), e.what());
! 2060                                     throw std::runtime_error(
! 2061                                         std::string("Failed to encode parameter ") +
! 2062                                         std::to_string(paramIndex) + " row " + std::to_string(i) +
! 2063                                         " with encoding '" + charEncoding + "': " + e.what());
! 2064                                 }
  2065                             } else {
  2066                                 // bytes/bytearray - use as-is (already encoded)
  2067                                 encodedStr = columnValues[i].cast<std::string>();
  2068                             }

Lines 2070-2078

  2070                             if (encodedStr.size() > info.columnSize) {
  2071                                 LOG("BindParameterArray: String/binary too "
  2072                                     "long - param_index=%d, row=%zu, size=%zu, "
  2073                                     "max=%zu",
! 2074                                     paramIndex, i, encodedStr.size(), info.columnSize);
  2075                                 ThrowStdException("Input exceeds column size at index " +
  2076                                                   std::to_string(i));
  2077                             }
  2078                             std::memcpy(charArray + i * (info.columnSize + 1), encodedStr.c_str(),

Lines 2542-2550

  2542             py::list rowParams = columnwise_params[rowIndex];
  2543 
  2544             std::vector<std::shared_ptr<void>> paramBuffers;
  2545             rc = BindParameters(hStmt, rowParams, const_cast<std::vector<ParamInfo>&>(paramInfos),
! 2546                                 paramBuffers, charEncoding);
  2547             if (!SQL_SUCCEEDED(rc)) {
  2548                 LOG("SQLExecuteMany: BindParameters failed for row %zu - rc=%d", rowIndex, rc);
  2549                 return rc;
  2550             }

Lines 2845-2857

  2845             "column %d",
  2846             charEncoding.c_str(), buffer.size(), py::len(decoded), colIndex);
  2847         return decoded;
  2848     } catch (const py::error_already_set& e) {
! 2849         LOG_ERROR("FetchLobColumnData: Failed to decode with '%s' for column %d: %s",
! 2850                   charEncoding.c_str(), colIndex, e.what());
  2851         // Return raw bytes as fallback
! 2852         return raw_bytes;
! 2853     }
  2854 }
  2855 
  2856 // Helper function to retrieve column data
  2857 SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, py::list& row,

Lines 2926-2939

  2926                                     LOG("SQLGetData: CHAR column %d decoded with '%s', %zu bytes "
  2927                                         "-> %zu chars",
  2928                                         i, charEncoding.c_str(), (size_t)dataLen, py::len(decoded));
  2929                                 } catch (const py::error_already_set& e) {
! 2930                                     LOG_ERROR(
! 2931                                         "SQLGetData: Failed to decode CHAR column %d with '%s': %s",
! 2932                                         i, charEncoding.c_str(), e.what());
  2933                                     // Return raw bytes as fallback
! 2934                                     row.append(raw_bytes);
! 2935                                 }
  2936                             } else {
  2937                                 // Buffer too small, fallback to streaming
  2938                                 LOG("SQLGetData: CHAR column %d data truncated "
  2939                                     "(buffer_size=%zu), using streaming LOB",

Lines 2937-2946

  2937                                 // Buffer too small, fallback to streaming
  2938                                 LOG("SQLGetData: CHAR column %d data truncated "
  2939                                     "(buffer_size=%zu), using streaming LOB",
  2940                                     i, dataBuffer.size());
! 2941                                 row.append(FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false,
! 2942                                                               charEncoding));
  2943                             }
  2944                         } else if (dataLen == SQL_NULL_DATA) {
  2945                             LOG("SQLGetData: Column %d is NULL (CHAR)", i);
  2946                             row.append(py::none());

Lines 3271-3280

  3271                             if (static_cast<size_t>(dataLen) <= columnSize) {
  3272                                 row.append(py::bytes(
  3273                                     reinterpret_cast<const char*>(dataBuffer.data()), dataLen));
  3274                             } else {
! 3275                                 row.append(
! 3276                                     FetchLobColumnData(hStmt, i, SQL_C_BINARY, false, true, ""));
  3277                             }
  3278                         } else if (dataLen == SQL_NULL_DATA) {
  3279                             row.append(py::none());
  3280                         } else if (dataLen == 0) {

mssql_python/pybind/ddbc_bindings.h

  824             PyList_SET_ITEM(row, col - 1, pyBytes);
  825         }
  826     } else {
  827         // Slow path: LOB data requires separate fetch call
! 828         PyList_SET_ITEM(
! 829             row, col - 1,
! 830             FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true, "").release().ptr());
  831     }
  832 }
  833 
  834 }  // namespace ColumnProcessors


๐Ÿ“‹ Files Needing Attention

๐Ÿ“‰ Files with overall lowest coverage (click to expand)
mssql_python.pybind.logger_bridge.hpp: 58.8%
mssql_python.pybind.logger_bridge.cpp: 59.2%
mssql_python.pybind.ddbc_bindings.cpp: 66.2%
mssql_python.row.py: 66.2%
mssql_python.helpers.py: 67.5%
mssql_python.pybind.connection.connection.cpp: 73.6%
mssql_python.pybind.ddbc_bindings.h: 76.6%
mssql_python.ddbc_bindings.py: 79.6%
mssql_python.pybind.connection.connection_pool.cpp: 79.6%
mssql_python.connection.py: 83.7%

๐Ÿ”— Quick Links