Generated by Cython 3.1.6
Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.
Raw output: csv_reader.cpp
+001: """Cython wrapper for reading CSV/TSV files."""
__pyx_t_2 = __Pyx_PyDict_NewPresized(4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_kp_u_get_csv_schema_line_65, __pyx_mstate_global->__pyx_kp_u_Extract_schema_information_from) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_kp_u_read_csv_line_158, __pyx_mstate_global->__pyx_kp_u_Read_CSV_data_into_columnar_for) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_kp_u_detect_csv_dialect_line_325, __pyx_mstate_global->__pyx_kp_u_Auto_detect_CSV_dialect_delimit) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_kp_u_read_tsv_line_382, __pyx_mstate_global->__pyx_kp_u_Read_TSV_tab_separated_values_d) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_test, __pyx_t_2) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
002:
003: # distutils: language = c++
004: # cython: language_level=3
005: # cython: nonecheck=False
006: # cython: cdivision=True
007: # cython: boundscheck=False
008: # cython: wraparound=False
009: # cython: infer_types=True
010:
011: from libc.stdint cimport uint8_t
012: from libcpp.string cimport string
013: from libcpp.vector cimport vector
014: from libcpp cimport bool as cbool
015: from cpython.buffer cimport PyBUF_CONTIG_RO, PyObject_GetBuffer, PyBuffer_Release, Py_buffer
016:
017:
018: # C++ declarations from csv_parser.hpp
019: cdef extern from "csv_parser.hpp":
020: cdef enum CsvType:
021: Null "CsvType::Null"
022: Boolean "CsvType::Boolean"
023: Integer "CsvType::Integer"
024: Double "CsvType::Double"
025: String "CsvType::String"
026:
027: cdef cppclass CsvColumnSchema:
028: string name
029: CsvType type
030: cbool nullable
031:
032: cdef cppclass CsvColumn:
033: vector[long long] int_values
034: vector[double] double_values
035: vector[string] string_values
036: vector[uint8_t] boolean_values
037: vector[uint8_t] null_mask
038: string type
039: cbool success
040:
041: cdef cppclass CsvTable:
042: vector[CsvColumn] columns
043: vector[string] column_names
044: size_t num_rows
045: cbool success
046:
047: cdef cppclass CsvDialect:
048: char delimiter
049: char quote_char
050: char escape_char
051: cbool double_quote
052: cbool has_header
053:
054: vector[CsvColumnSchema] GetCsvSchema(const uint8_t* data, size_t size,
055: const CsvDialect& dialect, size_t sample_size)
056:
057: CsvTable ReadCsv(const uint8_t* data, size_t size, const CsvDialect& dialect)
058:
059: CsvTable ReadCsv(const uint8_t* data, size_t size, const CsvDialect& dialect,
060: const vector[string]& column_names)
061:
062: CsvDialect DetectCsvDialect(const uint8_t* data, size_t size, size_t sample_size)
063:
064:
+065: def get_csv_schema(data, delimiter=',', quote_char='"', sample_size=100, has_header=True):
/* Python wrapper */ static PyObject *__pyx_pw_4rugo_3csv_1get_csv_schema(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ); /*proto*/ PyDoc_STRVAR(__pyx_doc_4rugo_3csv_get_csv_schema, "\n Extract schema information from CSV data.\n \n Parameters\n ----------\n data : bytes or memoryview\n The CSV data to analyze\n delimiter : str, default ','\n Field delimiter character (use '\\t' for TSV)\n quote_char : str, default '\"'\n Quote character for fields\n sample_size : int, default 100\n Number of rows to sample for type inference\n has_header : bool, default True\n Whether the first line contains column names\n \n Returns\n -------\n list of dict\n List of column schemas with keys: name, type, nullable\n \n Examples\n --------\n >>> data = b'name,age,salary\\nAlice,30,50000\\nBob,25,45000'\n >>> schema = get_csv_schema(data)\n >>> for col in schema:\n ... print(f\"{col['name']}: {col['type']}\")\n name: string\n age: int64\n salary: int64\n "); static PyMethodDef __pyx_mdef_4rugo_3csv_1get_csv_schema = {"get_csv_schema", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4rugo_3csv_1get_csv_schema, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_4rugo_3csv_get_csv_schema}; static PyObject *__pyx_pw_4rugo_3csv_1get_csv_schema(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ) { PyObject *__pyx_v_data = 0; PyObject *__pyx_v_delimiter = 0; PyObject *__pyx_v_quote_char = 0; PyObject *__pyx_v_sample_size = 0; PyObject *__pyx_v_has_header = 0; #if !CYTHON_METH_FASTCALL CYTHON_UNUSED Py_ssize_t __pyx_nargs; #endif CYTHON_UNUSED PyObject *const *__pyx_kwvalues; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("get_csv_schema (wrapper)", 0); #if !CYTHON_METH_FASTCALL #if CYTHON_ASSUME_SAFE_SIZE __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); #else __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; #endif #endif __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); { PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_data,&__pyx_mstate_global->__pyx_n_u_delimiter,&__pyx_mstate_global->__pyx_n_u_quote_char,&__pyx_mstate_global->__pyx_n_u_sample_size,&__pyx_mstate_global->__pyx_n_u_has_header,0}; PyObject* values[5] = {0,0,0,0,0}; const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0; if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 65, __pyx_L3_error) if (__pyx_kwds_len > 0) { switch (__pyx_nargs) { case 5: values[4] = __Pyx_ArgRef_FASTCALL(__pyx_args, 4); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 4: values[3] = __Pyx_ArgRef_FASTCALL(__pyx_args, 3); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 3: values[2] = __Pyx_ArgRef_FASTCALL(__pyx_args, 2); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "get_csv_schema", 0) < (0)) __PYX_ERR(0, 65, __pyx_L3_error) if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u_))); if (!values[2]) values[2] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u__2))); if (!values[3]) values[3] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); if (!values[4]) values[4] = __Pyx_NewRef(((PyObject *)((PyObject*)Py_True))); for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("get_csv_schema", 0, 1, 5, i); __PYX_ERR(0, 65, __pyx_L3_error) } } } else { switch (__pyx_nargs) { case 5: values[4] = __Pyx_ArgRef_FASTCALL(__pyx_args, 4); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 4: values[3] = __Pyx_ArgRef_FASTCALL(__pyx_args, 3); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 3: values[2] = __Pyx_ArgRef_FASTCALL(__pyx_args, 2); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 65, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 65, __pyx_L3_error) break; default: goto __pyx_L5_argtuple_error; } if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u_))); if (!values[2]) values[2] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u__2))); if (!values[3]) values[3] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); if (!values[4]) values[4] = __Pyx_NewRef(((PyObject *)((PyObject*)Py_True))); } __pyx_v_data = values[0]; __pyx_v_delimiter = values[1]; __pyx_v_quote_char = values[2]; __pyx_v_sample_size = values[3]; __pyx_v_has_header = values[4]; } goto __pyx_L6_skip; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("get_csv_schema", 0, 1, 5, __pyx_nargs); __PYX_ERR(0, 65, __pyx_L3_error) __pyx_L6_skip:; goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_AddTraceback("rugo.csv.get_csv_schema", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_4rugo_3csv_get_csv_schema(__pyx_self, __pyx_v_data, __pyx_v_delimiter, __pyx_v_quote_char, __pyx_v_sample_size, __pyx_v_has_header); /* function exit code */ for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4rugo_3csv_get_csv_schema(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data, PyObject *__pyx_v_delimiter, PyObject *__pyx_v_quote_char, PyObject *__pyx_v_sample_size, PyObject *__pyx_v_has_header) { uint8_t const *__pyx_v_data_ptr; size_t __pyx_v_data_size; PyObject *__pyx_v_data_bytes = 0; Py_buffer __pyx_v_view; bool __pyx_v_have_view; CsvDialect __pyx_v_dialect; std::vector<CsvColumnSchema> __pyx_v_schema; PyObject *__pyx_v_result = NULL; size_t __pyx_v_i; int __pyx_v_type_val; PyObject *__pyx_v_type_str = NULL; PyObject *__pyx_r = NULL; /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_9); __Pyx_AddTraceback("rugo.csv.get_csv_schema", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_data_bytes); __Pyx_XDECREF(__pyx_v_result); __Pyx_XDECREF(__pyx_v_type_str); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4rugo_3csv_1get_csv_schema, 0, __pyx_mstate_global->__pyx_n_u_get_csv_schema, NULL, __pyx_mstate_global->__pyx_n_u_rugo_csv, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[0])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 65, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_mstate_global->__pyx_tuple[0]); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_get_csv_schema, __pyx_t_2) < (0)) __PYX_ERR(0, 65, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* … */ __pyx_mstate_global->__pyx_tuple[0] = PyTuple_Pack(4, ((PyObject*)__pyx_mstate_global->__pyx_kp_u_), ((PyObject*)__pyx_mstate_global->__pyx_kp_u__2), ((PyObject*)__pyx_mstate_global->__pyx_int_100), ((PyObject*)Py_True)); if (unlikely(!__pyx_mstate_global->__pyx_tuple[0])) __PYX_ERR(0, 65, __pyx_L1_error) __Pyx_GOTREF(__pyx_mstate_global->__pyx_tuple[0]); __Pyx_GIVEREF(__pyx_mstate_global->__pyx_tuple[0]);
066: """
067: Extract schema information from CSV data.
068:
069: Parameters
070: ----------
071: data : bytes or memoryview
072: The CSV data to analyze
073: delimiter : str, default ','
074: Field delimiter character (use '\\t' for TSV)
075: quote_char : str, default '"'
076: Quote character for fields
077: sample_size : int, default 100
078: Number of rows to sample for type inference
079: has_header : bool, default True
080: Whether the first line contains column names
081:
082: Returns
083: -------
084: list of dict
085: List of column schemas with keys: name, type, nullable
086:
087: Examples
088: --------
089: >>> data = b'name,age,salary\\nAlice,30,50000\\nBob,25,45000'
090: >>> schema = get_csv_schema(data)
091: >>> for col in schema:
092: ... print(f"{col['name']}: {col['type']}")
093: name: string
094: age: int64
095: salary: int64
096: """
097: cdef const uint8_t* data_ptr
098: cdef size_t data_size
099: cdef bytes data_bytes
100: cdef Py_buffer view
+101: cdef cbool have_view = False
__pyx_v_have_view = 0;
102:
103: # Handle different input types
+104: if isinstance(data, memoryview):
__pyx_t_1 = PyMemoryView_Check(__pyx_v_data);
if (__pyx_t_1) {
/* … */
goto __pyx_L3;
}
+105: PyObject_GetBuffer(data.obj, &view, PyBUF_CONTIG_RO)
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_mstate_global->__pyx_n_u_obj); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 105, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PyObject_GetBuffer(__pyx_t_2, (&__pyx_v_view), PyBUF_CONTIG_RO); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(0, 105, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+106: data_ptr = <const uint8_t*>view.buf
__pyx_v_data_ptr = ((uint8_t const *)__pyx_v_view.buf);
+107: data_size = view.len
__pyx_t_4 = __pyx_v_view.len;
__pyx_v_data_size = __pyx_t_4;
+108: have_view = True
__pyx_v_have_view = 1;
+109: elif isinstance(data, bytes):
__pyx_t_1 = PyBytes_Check(__pyx_v_data);
if (likely(__pyx_t_1)) {
/* … */
goto __pyx_L3;
}
+110: data_bytes = data
__pyx_t_2 = __pyx_v_data;
__Pyx_INCREF(__pyx_t_2);
if (!(likely(PyBytes_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_2))) __PYX_ERR(0, 110, __pyx_L1_error)
__pyx_v_data_bytes = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
+111: data_ptr = <const uint8_t*><char*>data_bytes
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found");
__PYX_ERR(0, 111, __pyx_L1_error)
}
__pyx_t_5 = __Pyx_PyBytes_AsWritableString(__pyx_v_data_bytes); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 111, __pyx_L1_error)
__pyx_v_data_ptr = ((uint8_t const *)((char *)__pyx_t_5));
+112: data_size = len(data_bytes)
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
__PYX_ERR(0, 112, __pyx_L1_error)
}
__pyx_t_4 = __Pyx_PyBytes_GET_SIZE(__pyx_v_data_bytes); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(0, 112, __pyx_L1_error)
__pyx_v_data_size = __pyx_t_4;
113: else:
+114: raise TypeError(f"Expected bytes or memoryview, got {type(data)}")
/*else*/ {
__pyx_t_6 = NULL;
__Pyx_INCREF(__pyx_builtin_TypeError);
__pyx_t_7 = __pyx_builtin_TypeError;
__pyx_t_8 = __Pyx_PyObject_FormatSimple(((PyObject *)Py_TYPE(__pyx_v_data)), __pyx_mstate_global->__pyx_empty_unicode); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 114, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_8);
__pyx_t_9 = __Pyx_PyUnicode_Concat(__pyx_mstate_global->__pyx_kp_u_Expected_bytes_or_memoryview_got, __pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 114, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_9);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
__pyx_t_10 = 1;
{
PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_t_9};
__pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_7, __pyx_callargs+__pyx_t_10, (2-__pyx_t_10) | (__pyx_t_10*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
__Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 114, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__Pyx_Raise(__pyx_t_2, 0, 0, 0);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__PYX_ERR(0, 114, __pyx_L1_error)
}
__pyx_L3:;
115:
116: # Setup dialect
117: cdef CsvDialect dialect
+118: dialect.delimiter = ord(delimiter[0]) if delimiter else ord(',')
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_delimiter); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 118, __pyx_L1_error) if (__pyx_t_1) { __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_delimiter, 0, long, 1, __Pyx_PyLong_From_long, 0, 0, 0, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 118, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_12 = __Pyx_PyObject_Ord(__pyx_t_2); if (unlikely(__pyx_t_12 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 118, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_11 = __pyx_t_12; } else { __pyx_t_11 = 44; } __pyx_v_dialect.delimiter = __pyx_t_11;
+119: dialect.quote_char = ord(quote_char[0]) if quote_char else ord('"')
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_quote_char); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 119, __pyx_L1_error) if (__pyx_t_1) { __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_quote_char, 0, long, 1, __Pyx_PyLong_From_long, 0, 0, 0, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 119, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_12 = __Pyx_PyObject_Ord(__pyx_t_2); if (unlikely(__pyx_t_12 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 119, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_11 = __pyx_t_12; } else { __pyx_t_11 = 34; } __pyx_v_dialect.quote_char = __pyx_t_11;
+120: dialect.escape_char = ord('\\')
__pyx_v_dialect.escape_char = 92;
+121: dialect.double_quote = True
__pyx_v_dialect.double_quote = 1;
+122: dialect.has_header = has_header
__pyx_t_13 = __Pyx_PyObject_IsTrue(__pyx_v_has_header); if (unlikely((__pyx_t_13 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 122, __pyx_L1_error) __pyx_v_dialect.has_header = __pyx_t_13;
123:
124: # Get schema
+125: cdef vector[CsvColumnSchema] schema = GetCsvSchema(data_ptr, data_size, dialect, sample_size)
__pyx_t_10 = __Pyx_PyLong_As_size_t(__pyx_v_sample_size); if (unlikely((__pyx_t_10 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 125, __pyx_L1_error) __pyx_v_schema = GetCsvSchema(__pyx_v_data_ptr, __pyx_v_data_size, __pyx_v_dialect, __pyx_t_10);
126:
+127: if have_view:
__pyx_t_1 = (__pyx_v_have_view != 0);
if (__pyx_t_1) {
/* … */
}
+128: PyBuffer_Release(&view)
PyBuffer_Release((&__pyx_v_view));
129:
130: # Convert to Python
+131: result = []
__pyx_t_2 = PyList_New(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 131, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_v_result = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0;
132: cdef size_t i
133: cdef int type_val
134:
+135: for i in range(schema.size()):
__pyx_t_14 = __pyx_v_schema.size();
__pyx_t_15 = __pyx_t_14;
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_15; __pyx_t_10+=1) {
__pyx_v_i = __pyx_t_10;
+136: type_val = <int>schema[i].type
__pyx_v_type_val = ((int)(__pyx_v_schema[__pyx_v_i]).type);
137:
+138: if type_val == <int>CsvType.Integer:
__pyx_t_1 = (__pyx_v_type_val == ((int)CsvType::Integer));
if (__pyx_t_1) {
/* … */
goto __pyx_L7;
}
+139: type_str = "int64"
__Pyx_INCREF(__pyx_mstate_global->__pyx_n_u_int64); __Pyx_XDECREF_SET(__pyx_v_type_str, __pyx_mstate_global->__pyx_n_u_int64);
+140: elif type_val == <int>CsvType.Double:
__pyx_t_1 = (__pyx_v_type_val == ((int)CsvType::Double));
if (__pyx_t_1) {
/* … */
goto __pyx_L7;
}
+141: type_str = "double"
__Pyx_INCREF(__pyx_mstate_global->__pyx_n_u_double); __Pyx_XDECREF_SET(__pyx_v_type_str, __pyx_mstate_global->__pyx_n_u_double);
+142: elif type_val == <int>CsvType.Boolean:
__pyx_t_1 = (__pyx_v_type_val == ((int)CsvType::Boolean));
if (__pyx_t_1) {
/* … */
goto __pyx_L7;
}
+143: type_str = "boolean"
__Pyx_INCREF(__pyx_mstate_global->__pyx_n_u_boolean); __Pyx_XDECREF_SET(__pyx_v_type_str, __pyx_mstate_global->__pyx_n_u_boolean);
+144: elif type_val == <int>CsvType.String:
__pyx_t_1 = (__pyx_v_type_val == ((int)CsvType::String));
if (__pyx_t_1) {
/* … */
goto __pyx_L7;
}
+145: type_str = "string"
__Pyx_INCREF(__pyx_mstate_global->__pyx_n_u_string); __Pyx_XDECREF_SET(__pyx_v_type_str, __pyx_mstate_global->__pyx_n_u_string);
146: else:
+147: type_str = "string"
/*else*/ {
__Pyx_INCREF(__pyx_mstate_global->__pyx_n_u_string);
__Pyx_XDECREF_SET(__pyx_v_type_str, __pyx_mstate_global->__pyx_n_u_string);
}
__pyx_L7:;
148:
+149: result.append({
__pyx_t_16 = __Pyx_PyList_Append(__pyx_v_result, __pyx_t_2); if (unlikely(__pyx_t_16 == ((int)-1))) __PYX_ERR(0, 149, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; }
+150: 'name': schema[i].name.decode('utf-8'),
__pyx_t_2 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_7 = __Pyx_decode_cpp_string((__pyx_v_schema[__pyx_v_i]).name, 0, PY_SSIZE_T_MAX, NULL, NULL, PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_name, __pyx_t_7) < (0)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+151: 'type': type_str,
if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_type, __pyx_v_type_str) < (0)) __PYX_ERR(0, 150, __pyx_L1_error)
+152: 'nullable': schema[i].nullable
__pyx_t_7 = __Pyx_PyBool_FromLong((__pyx_v_schema[__pyx_v_i]).nullable); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 152, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_nullable, __pyx_t_7) < (0)) __PYX_ERR(0, 150, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
153: })
154:
+155: return result
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_result); __pyx_r = __pyx_v_result; goto __pyx_L0;
156:
157:
+158: def read_csv(data, columns=None, delimiter=',', quote_char='"', has_header=True):
/* Python wrapper */ static PyObject *__pyx_pw_4rugo_3csv_3read_csv(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ); /*proto*/ PyDoc_STRVAR(__pyx_doc_4rugo_3csv_2read_csv, "\n Read CSV data into columnar format.\n \n Parameters\n ----------\n data : bytes or memoryview\n The CSV data to read\n columns : list of str, optional\n List of column names to read. If None, reads all columns.\n delimiter : str, default ','\n Field delimiter character (use '\\t' for TSV)\n quote_char : str, default '\"'\n Quote character for fields\n has_header : bool, default True\n Whether the first line contains column names\n \n Returns\n -------\n dict\n Dictionary with keys:\n - success: bool - Whether reading succeeded\n - num_rows: int - Number of rows read\n - column_names: list of str - Column names\n - columns: list of list - Column data (one list per column)\n \n Examples\n --------\n >>> data = b'name,age,salary\\nAlice,30,50000\\nBob,25,45000'\n >>> result = read_csv(data)\n >>> print(result['column_names'])\n ['name', 'age', 'salary']\n >>> print(result['columns'][0]) # name column\n ['Alice', 'Bob']\n \n >>> # Read with projection\n >>> result = read_csv(data, columns=['name', 'salary'])\n >>> print(result['column_names'])\n ['name', 'salary']\n "); static PyMethodDef __pyx_mdef_4rugo_3csv_3read_csv = {"read_csv", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4rugo_3csv_3read_csv, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_4rugo_3csv_2read_csv}; static PyObject *__pyx_pw_4rugo_3csv_3read_csv(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ) { PyObject *__pyx_v_data = 0; PyObject *__pyx_v_columns = 0; PyObject *__pyx_v_delimiter = 0; PyObject *__pyx_v_quote_char = 0; PyObject *__pyx_v_has_header = 0; #if !CYTHON_METH_FASTCALL CYTHON_UNUSED Py_ssize_t __pyx_nargs; #endif CYTHON_UNUSED PyObject *const *__pyx_kwvalues; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_csv (wrapper)", 0); #if !CYTHON_METH_FASTCALL #if CYTHON_ASSUME_SAFE_SIZE __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); #else __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; #endif #endif __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); { PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_data,&__pyx_mstate_global->__pyx_n_u_columns,&__pyx_mstate_global->__pyx_n_u_delimiter,&__pyx_mstate_global->__pyx_n_u_quote_char,&__pyx_mstate_global->__pyx_n_u_has_header,0}; PyObject* values[5] = {0,0,0,0,0}; const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0; if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 158, __pyx_L3_error) if (__pyx_kwds_len > 0) { switch (__pyx_nargs) { case 5: values[4] = __Pyx_ArgRef_FASTCALL(__pyx_args, 4); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 4: values[3] = __Pyx_ArgRef_FASTCALL(__pyx_args, 3); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 3: values[2] = __Pyx_ArgRef_FASTCALL(__pyx_args, 2); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "read_csv", 0) < (0)) __PYX_ERR(0, 158, __pyx_L3_error) if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)Py_None)); if (!values[2]) values[2] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u_))); if (!values[3]) values[3] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u__2))); if (!values[4]) values[4] = __Pyx_NewRef(((PyObject *)((PyObject*)Py_True))); for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("read_csv", 0, 1, 5, i); __PYX_ERR(0, 158, __pyx_L3_error) } } } else { switch (__pyx_nargs) { case 5: values[4] = __Pyx_ArgRef_FASTCALL(__pyx_args, 4); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[4])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 4: values[3] = __Pyx_ArgRef_FASTCALL(__pyx_args, 3); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[3])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 3: values[2] = __Pyx_ArgRef_FASTCALL(__pyx_args, 2); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[2])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 158, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 158, __pyx_L3_error) break; default: goto __pyx_L5_argtuple_error; } if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)Py_None)); if (!values[2]) values[2] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u_))); if (!values[3]) values[3] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_kp_u__2))); if (!values[4]) values[4] = __Pyx_NewRef(((PyObject *)((PyObject*)Py_True))); } __pyx_v_data = values[0]; __pyx_v_columns = values[1]; __pyx_v_delimiter = values[2]; __pyx_v_quote_char = values[3]; __pyx_v_has_header = values[4]; } goto __pyx_L6_skip; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("read_csv", 0, 1, 5, __pyx_nargs); __PYX_ERR(0, 158, __pyx_L3_error) __pyx_L6_skip:; goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_AddTraceback("rugo.csv.read_csv", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_4rugo_3csv_2read_csv(__pyx_self, __pyx_v_data, __pyx_v_columns, __pyx_v_delimiter, __pyx_v_quote_char, __pyx_v_has_header); /* function exit code */ for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4rugo_3csv_2read_csv(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data, PyObject *__pyx_v_columns, PyObject *__pyx_v_delimiter, PyObject *__pyx_v_quote_char, PyObject *__pyx_v_has_header) { uint8_t const *__pyx_v_data_ptr; size_t __pyx_v_data_size; PyObject *__pyx_v_data_bytes = 0; Py_buffer __pyx_v_view; bool __pyx_v_have_view; CsvDialect __pyx_v_dialect; std::vector<std::string> __pyx_v_column_names_cpp; PyObject *__pyx_v_col = NULL; CsvTable __pyx_v_table; PyObject *__pyx_v_result = NULL; size_t __pyx_v_i; size_t __pyx_v_j; PyObject *__pyx_v_col_type = NULL; std::vector<PY_LONG_LONG> ::size_type __pyx_v_n; std::vector<uint8_t> ::size_type __pyx_v_nm; PyObject *__pyx_v_col_data = NULL; PyObject *__pyx_r = NULL; /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_9); __Pyx_AddTraceback("rugo.csv.read_csv", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_data_bytes); __Pyx_XDECREF(__pyx_v_col); __Pyx_XDECREF(__pyx_v_result); __Pyx_XDECREF(__pyx_v_col_type); __Pyx_XDECREF(__pyx_v_col_data); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4rugo_3csv_3read_csv, 0, __pyx_mstate_global->__pyx_n_u_read_csv, NULL, __pyx_mstate_global->__pyx_n_u_rugo_csv, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[1])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 158, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_mstate_global->__pyx_tuple[1]); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_read_csv, __pyx_t_2) < (0)) __PYX_ERR(0, 158, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* … */ __pyx_mstate_global->__pyx_tuple[1] = PyTuple_Pack(4, Py_None, ((PyObject*)__pyx_mstate_global->__pyx_kp_u_), ((PyObject*)__pyx_mstate_global->__pyx_kp_u__2), ((PyObject*)Py_True)); if (unlikely(!__pyx_mstate_global->__pyx_tuple[1])) __PYX_ERR(0, 158, __pyx_L1_error) __Pyx_GOTREF(__pyx_mstate_global->__pyx_tuple[1]); __Pyx_GIVEREF(__pyx_mstate_global->__pyx_tuple[1]);
159: """
160: Read CSV data into columnar format.
161:
162: Parameters
163: ----------
164: data : bytes or memoryview
165: The CSV data to read
166: columns : list of str, optional
167: List of column names to read. If None, reads all columns.
168: delimiter : str, default ','
169: Field delimiter character (use '\\t' for TSV)
170: quote_char : str, default '"'
171: Quote character for fields
172: has_header : bool, default True
173: Whether the first line contains column names
174:
175: Returns
176: -------
177: dict
178: Dictionary with keys:
179: - success: bool - Whether reading succeeded
180: - num_rows: int - Number of rows read
181: - column_names: list of str - Column names
182: - columns: list of list - Column data (one list per column)
183:
184: Examples
185: --------
186: >>> data = b'name,age,salary\\nAlice,30,50000\\nBob,25,45000'
187: >>> result = read_csv(data)
188: >>> print(result['column_names'])
189: ['name', 'age', 'salary']
190: >>> print(result['columns'][0]) # name column
191: ['Alice', 'Bob']
192:
193: >>> # Read with projection
194: >>> result = read_csv(data, columns=['name', 'salary'])
195: >>> print(result['column_names'])
196: ['name', 'salary']
197: """
198: cdef const uint8_t* data_ptr
199: cdef size_t data_size
200: cdef bytes data_bytes
201: cdef Py_buffer view
+202: cdef cbool have_view = False
__pyx_v_have_view = 0;
203:
204: # Handle different input types
+205: if isinstance(data, memoryview):
__pyx_t_1 = PyMemoryView_Check(__pyx_v_data);
if (__pyx_t_1) {
/* … */
goto __pyx_L3;
}
+206: PyObject_GetBuffer(data.obj, &view, PyBUF_CONTIG_RO)
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_mstate_global->__pyx_n_u_obj); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 206, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PyObject_GetBuffer(__pyx_t_2, (&__pyx_v_view), PyBUF_CONTIG_RO); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(0, 206, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+207: data_ptr = <const uint8_t*>view.buf
__pyx_v_data_ptr = ((uint8_t const *)__pyx_v_view.buf);
+208: data_size = view.len
__pyx_t_4 = __pyx_v_view.len;
__pyx_v_data_size = __pyx_t_4;
+209: have_view = True
__pyx_v_have_view = 1;
+210: elif isinstance(data, bytes):
__pyx_t_1 = PyBytes_Check(__pyx_v_data);
if (likely(__pyx_t_1)) {
/* … */
goto __pyx_L3;
}
+211: data_bytes = data
__pyx_t_2 = __pyx_v_data;
__Pyx_INCREF(__pyx_t_2);
if (!(likely(PyBytes_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_2))) __PYX_ERR(0, 211, __pyx_L1_error)
__pyx_v_data_bytes = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
+212: data_ptr = <const uint8_t*><char*>data_bytes
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found");
__PYX_ERR(0, 212, __pyx_L1_error)
}
__pyx_t_5 = __Pyx_PyBytes_AsWritableString(__pyx_v_data_bytes); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 212, __pyx_L1_error)
__pyx_v_data_ptr = ((uint8_t const *)((char *)__pyx_t_5));
+213: data_size = len(data_bytes)
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
__PYX_ERR(0, 213, __pyx_L1_error)
}
__pyx_t_4 = __Pyx_PyBytes_GET_SIZE(__pyx_v_data_bytes); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(0, 213, __pyx_L1_error)
__pyx_v_data_size = __pyx_t_4;
214: else:
+215: raise TypeError(f"Expected bytes or memoryview, got {type(data)}")
/*else*/ {
__pyx_t_6 = NULL;
__Pyx_INCREF(__pyx_builtin_TypeError);
__pyx_t_7 = __pyx_builtin_TypeError;
__pyx_t_8 = __Pyx_PyObject_FormatSimple(((PyObject *)Py_TYPE(__pyx_v_data)), __pyx_mstate_global->__pyx_empty_unicode); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 215, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_8);
__pyx_t_9 = __Pyx_PyUnicode_Concat(__pyx_mstate_global->__pyx_kp_u_Expected_bytes_or_memoryview_got, __pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 215, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_9);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
__pyx_t_10 = 1;
{
PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_t_9};
__pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_7, __pyx_callargs+__pyx_t_10, (2-__pyx_t_10) | (__pyx_t_10*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
__Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 215, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__Pyx_Raise(__pyx_t_2, 0, 0, 0);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__PYX_ERR(0, 215, __pyx_L1_error)
}
__pyx_L3:;
216:
217: # Setup dialect
218: cdef CsvDialect dialect
+219: dialect.delimiter = ord(delimiter[0]) if delimiter else ord(',')
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_delimiter); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 219, __pyx_L1_error) if (__pyx_t_1) { __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_delimiter, 0, long, 1, __Pyx_PyLong_From_long, 0, 0, 0, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 219, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_12 = __Pyx_PyObject_Ord(__pyx_t_2); if (unlikely(__pyx_t_12 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 219, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_11 = __pyx_t_12; } else { __pyx_t_11 = 44; } __pyx_v_dialect.delimiter = __pyx_t_11;
+220: dialect.quote_char = ord(quote_char[0]) if quote_char else ord('"')
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_quote_char); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 220, __pyx_L1_error) if (__pyx_t_1) { __pyx_t_2 = __Pyx_GetItemInt(__pyx_v_quote_char, 0, long, 1, __Pyx_PyLong_From_long, 0, 0, 0, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 220, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_12 = __Pyx_PyObject_Ord(__pyx_t_2); if (unlikely(__pyx_t_12 == ((long)(long)(Py_UCS4)-1))) __PYX_ERR(0, 220, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_11 = __pyx_t_12; } else { __pyx_t_11 = 34; } __pyx_v_dialect.quote_char = __pyx_t_11;
+221: dialect.escape_char = ord('\\')
__pyx_v_dialect.escape_char = 92;
+222: dialect.double_quote = True
__pyx_v_dialect.double_quote = 1;
+223: dialect.has_header = has_header
__pyx_t_13 = __Pyx_PyObject_IsTrue(__pyx_v_has_header); if (unlikely((__pyx_t_13 == ((bool)-1)) && PyErr_Occurred())) __PYX_ERR(0, 223, __pyx_L1_error) __pyx_v_dialect.has_header = __pyx_t_13;
224:
225: # Convert column names to C++ vector
226: cdef vector[string] column_names_cpp
+227: if columns is not None:
__pyx_t_1 = (__pyx_v_columns != Py_None);
if (__pyx_t_1) {
/* … */
}
+228: for col in columns:
if (likely(PyList_CheckExact(__pyx_v_columns)) || PyTuple_CheckExact(__pyx_v_columns)) { __pyx_t_2 = __pyx_v_columns; __Pyx_INCREF(__pyx_t_2); __pyx_t_4 = 0; __pyx_t_14 = NULL; } else { __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_v_columns); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 228, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_14 = (CYTHON_COMPILING_IN_LIMITED_API) ? PyIter_Next : __Pyx_PyObject_GetIterNextFunc(__pyx_t_2); if (unlikely(!__pyx_t_14)) __PYX_ERR(0, 228, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_14)) { if (likely(PyList_CheckExact(__pyx_t_2))) { { Py_ssize_t __pyx_temp = __Pyx_PyList_GET_SIZE(__pyx_t_2); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 228, __pyx_L1_error) #endif if (__pyx_t_4 >= __pyx_temp) break; } __pyx_t_7 = __Pyx_PyList_GetItemRef(__pyx_t_2, __pyx_t_4); ++__pyx_t_4; } else { { Py_ssize_t __pyx_temp = __Pyx_PyTuple_GET_SIZE(__pyx_t_2); #if !CYTHON_ASSUME_SAFE_SIZE if (unlikely((__pyx_temp < 0))) __PYX_ERR(0, 228, __pyx_L1_error) #endif if (__pyx_t_4 >= __pyx_temp) break; } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS __pyx_t_7 = __Pyx_NewRef(PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4)); #else __pyx_t_7 = __Pyx_PySequence_ITEM(__pyx_t_2, __pyx_t_4); #endif ++__pyx_t_4; } if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 228, __pyx_L1_error) } else { __pyx_t_7 = __pyx_t_14(__pyx_t_2); if (unlikely(!__pyx_t_7)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) __PYX_ERR(0, 228, __pyx_L1_error) PyErr_Clear(); } break; } } __Pyx_GOTREF(__pyx_t_7); __Pyx_XDECREF_SET(__pyx_v_col, __pyx_t_7); __pyx_t_7 = 0; /* … */ } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+229: column_names_cpp.push_back(col.encode('utf-8'))
__pyx_t_9 = __pyx_v_col;
__Pyx_INCREF(__pyx_t_9);
__pyx_t_10 = 0;
{
PyObject *__pyx_callargs[2] = {__pyx_t_9, __pyx_mstate_global->__pyx_kp_u_utf_8};
__pyx_t_7 = __Pyx_PyObject_FastCallMethod(__pyx_mstate_global->__pyx_n_u_encode, __pyx_callargs+__pyx_t_10, (2-__pyx_t_10) | (1*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;
if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 229, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
}
__pyx_t_15 = __pyx_convert_string_from_py_6libcpp_6string_std__in_string(__pyx_t_7); if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 229, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
try {
__pyx_v_column_names_cpp.push_back(__pyx_t_15);
} catch(...) {
__Pyx_CppExn2PyErr();
__PYX_ERR(0, 229, __pyx_L1_error)
}
230:
231: # Read CSV
232: cdef CsvTable table
+233: if columns is None:
__pyx_t_1 = (__pyx_v_columns == Py_None);
if (__pyx_t_1) {
/* … */
goto __pyx_L8;
}
+234: table = ReadCsv(data_ptr, data_size, dialect)
__pyx_v_table = ReadCsv(__pyx_v_data_ptr, __pyx_v_data_size, __pyx_v_dialect);
235: else:
+236: table = ReadCsv(data_ptr, data_size, dialect, column_names_cpp)
/*else*/ {
__pyx_v_table = ReadCsv(__pyx_v_data_ptr, __pyx_v_data_size, __pyx_v_dialect, __pyx_v_column_names_cpp);
}
__pyx_L8:;
237:
+238: if have_view:
__pyx_t_1 = (__pyx_v_have_view != 0);
if (__pyx_t_1) {
/* … */
}
+239: PyBuffer_Release(&view)
PyBuffer_Release((&__pyx_v_view));
240:
241: # Convert to Python
242: result = {
+243: 'success': table.success,
__pyx_t_2 = __Pyx_PyDict_NewPresized(4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_7 = __Pyx_PyBool_FromLong(__pyx_v_table.success); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_success, __pyx_t_7) < (0)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+244: 'num_rows': table.num_rows,
__pyx_t_7 = __Pyx_PyLong_FromSize_t(__pyx_v_table.num_rows); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 244, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_num_rows, __pyx_t_7) < (0)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+245: 'column_names': [],
__pyx_t_7 = PyList_New(0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 245, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_column_names, __pyx_t_7) < (0)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+246: 'columns': []
__pyx_t_7 = PyList_New(0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 246, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_columns, __pyx_t_7) < (0)) __PYX_ERR(0, 243, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_v_result = ((PyObject*)__pyx_t_2); __pyx_t_2 = 0;
247: }
248:
249: cdef size_t i, j
250:
251: # Column names
+252: for i in range(table.column_names.size()):
__pyx_t_16 = __pyx_v_table.column_names.size();
__pyx_t_17 = __pyx_t_16;
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_17; __pyx_t_10+=1) {
__pyx_v_i = __pyx_t_10;
+253: result['column_names'].append(table.column_names[i].decode('utf-8'))
__pyx_t_2 = __Pyx_PyDict_GetItem(__pyx_v_result, __pyx_mstate_global->__pyx_n_u_column_names); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_7 = __Pyx_decode_cpp_string((__pyx_v_table.column_names[__pyx_v_i]), 0, PY_SSIZE_T_MAX, NULL, NULL, PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_18 = __Pyx_PyObject_Append(__pyx_t_2, __pyx_t_7); if (unlikely(__pyx_t_18 == ((int)-1))) __PYX_ERR(0, 253, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; }
254:
255: # Column data
+256: for i in range(table.columns.size()):
__pyx_t_19 = __pyx_v_table.columns.size();
__pyx_t_20 = __pyx_t_19;
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_20; __pyx_t_10+=1) {
__pyx_v_i = __pyx_t_10;
+257: col_type = table.columns[i].type.decode('utf-8')
__pyx_t_7 = __Pyx_decode_cpp_string((__pyx_v_table.columns[__pyx_v_i]).type, 0, PY_SSIZE_T_MAX, NULL, NULL, PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 257, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __Pyx_XDECREF_SET(__pyx_v_col_type, __pyx_t_7); __pyx_t_7 = 0;
258: # Pre-allocate list for faster assignment and fewer resizes
+259: if col_type == "int64":
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_col_type, __pyx_mstate_global->__pyx_n_u_int64, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 259, __pyx_L1_error) if (__pyx_t_1) { /* … */ goto __pyx_L14; }
+260: n = table.columns[i].int_values.size()
__pyx_v_n = (__pyx_v_table.columns[__pyx_v_i]).int_values.size();
+261: nm = table.columns[i].null_mask.size()
__pyx_v_nm = (__pyx_v_table.columns[__pyx_v_i]).null_mask.size();
+262: col_data = [None] * n
__pyx_t_7 = PyList_New(1 * (__pyx_v_n)); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 262, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < __pyx_v_n; __pyx_temp++) { __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); if (__Pyx_PyList_SET_ITEM(__pyx_t_7, __pyx_temp, Py_None) != (0)) __PYX_ERR(0, 262, __pyx_L1_error); } } __Pyx_XDECREF_SET(__pyx_v_col_data, ((PyObject*)__pyx_t_7)); __pyx_t_7 = 0;
+263: for j in range(n):
__pyx_t_21 = __pyx_v_n;
__pyx_t_22 = __pyx_t_21;
for (__pyx_t_23 = 0; __pyx_t_23 < __pyx_t_22; __pyx_t_23+=1) {
__pyx_v_j = __pyx_t_23;
+264: if nm == n:
__pyx_t_1 = (__pyx_v_nm == __pyx_v_n);
if (__pyx_t_1) {
/* … */
goto __pyx_L17;
}
+265: if table.columns[i].null_mask[j]:
__pyx_t_1 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
if (__pyx_t_1) {
/* … */
goto __pyx_L18;
}
+266: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 266, __pyx_L1_error)
267: else:
+268: col_data[j] = table.columns[i].int_values[j]
/*else*/ {
__pyx_t_7 = __Pyx_PyLong_From_PY_LONG_LONG(((__pyx_v_table.columns[__pyx_v_i]).int_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 268, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 268, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L18:;
269: else:
+270: if j < nm and table.columns[i].null_mask[j]:
/*else*/ {
__pyx_t_24 = (__pyx_v_j < __pyx_v_nm);
if (__pyx_t_24) {
} else {
__pyx_t_1 = __pyx_t_24;
goto __pyx_L20_bool_binop_done;
}
__pyx_t_24 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
__pyx_t_1 = __pyx_t_24;
__pyx_L20_bool_binop_done:;
if (__pyx_t_1) {
/* … */
goto __pyx_L19;
}
+271: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 271, __pyx_L1_error)
272: else:
+273: col_data[j] = table.columns[i].int_values[j]
/*else*/ {
__pyx_t_7 = __Pyx_PyLong_From_PY_LONG_LONG(((__pyx_v_table.columns[__pyx_v_i]).int_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 273, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 273, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L19:;
}
__pyx_L17:;
}
+274: elif col_type == "double":
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_col_type, __pyx_mstate_global->__pyx_n_u_double, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 274, __pyx_L1_error) if (__pyx_t_1) { /* … */ goto __pyx_L14; }
+275: n = table.columns[i].double_values.size()
__pyx_v_n = (__pyx_v_table.columns[__pyx_v_i]).double_values.size();
+276: nm = table.columns[i].null_mask.size()
__pyx_v_nm = (__pyx_v_table.columns[__pyx_v_i]).null_mask.size();
+277: col_data = [None] * n
__pyx_t_7 = PyList_New(1 * (__pyx_v_n)); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 277, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < __pyx_v_n; __pyx_temp++) { __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); if (__Pyx_PyList_SET_ITEM(__pyx_t_7, __pyx_temp, Py_None) != (0)) __PYX_ERR(0, 277, __pyx_L1_error); } } __Pyx_XDECREF_SET(__pyx_v_col_data, ((PyObject*)__pyx_t_7)); __pyx_t_7 = 0;
+278: for j in range(n):
__pyx_t_21 = __pyx_v_n;
__pyx_t_22 = __pyx_t_21;
for (__pyx_t_23 = 0; __pyx_t_23 < __pyx_t_22; __pyx_t_23+=1) {
__pyx_v_j = __pyx_t_23;
+279: if nm == n:
__pyx_t_1 = (__pyx_v_nm == __pyx_v_n);
if (__pyx_t_1) {
/* … */
goto __pyx_L24;
}
+280: if table.columns[i].null_mask[j]:
__pyx_t_1 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
if (__pyx_t_1) {
/* … */
goto __pyx_L25;
}
+281: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 281, __pyx_L1_error)
282: else:
+283: col_data[j] = table.columns[i].double_values[j]
/*else*/ {
__pyx_t_7 = PyFloat_FromDouble(((__pyx_v_table.columns[__pyx_v_i]).double_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 283, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 283, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L25:;
284: else:
+285: if j < nm and table.columns[i].null_mask[j]:
/*else*/ {
__pyx_t_24 = (__pyx_v_j < __pyx_v_nm);
if (__pyx_t_24) {
} else {
__pyx_t_1 = __pyx_t_24;
goto __pyx_L27_bool_binop_done;
}
__pyx_t_24 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
__pyx_t_1 = __pyx_t_24;
__pyx_L27_bool_binop_done:;
if (__pyx_t_1) {
/* … */
goto __pyx_L26;
}
+286: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 286, __pyx_L1_error)
287: else:
+288: col_data[j] = table.columns[i].double_values[j]
/*else*/ {
__pyx_t_7 = PyFloat_FromDouble(((__pyx_v_table.columns[__pyx_v_i]).double_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 288, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 288, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L26:;
}
__pyx_L24:;
}
+289: elif col_type == "boolean":
__pyx_t_1 = (__Pyx_PyUnicode_Equals(__pyx_v_col_type, __pyx_mstate_global->__pyx_n_u_boolean, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 289, __pyx_L1_error) if (__pyx_t_1) { /* … */ goto __pyx_L14; }
+290: n = table.columns[i].boolean_values.size()
__pyx_v_n = (__pyx_v_table.columns[__pyx_v_i]).boolean_values.size();
+291: nm = table.columns[i].null_mask.size()
__pyx_v_nm = (__pyx_v_table.columns[__pyx_v_i]).null_mask.size();
+292: col_data = [None] * n
__pyx_t_7 = PyList_New(1 * (__pyx_v_n)); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < __pyx_v_n; __pyx_temp++) { __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); if (__Pyx_PyList_SET_ITEM(__pyx_t_7, __pyx_temp, Py_None) != (0)) __PYX_ERR(0, 292, __pyx_L1_error); } } __Pyx_XDECREF_SET(__pyx_v_col_data, ((PyObject*)__pyx_t_7)); __pyx_t_7 = 0;
+293: for j in range(n):
__pyx_t_21 = __pyx_v_n;
__pyx_t_22 = __pyx_t_21;
for (__pyx_t_23 = 0; __pyx_t_23 < __pyx_t_22; __pyx_t_23+=1) {
__pyx_v_j = __pyx_t_23;
+294: if nm == n:
__pyx_t_1 = (__pyx_v_nm == __pyx_v_n);
if (__pyx_t_1) {
/* … */
goto __pyx_L31;
}
+295: if table.columns[i].null_mask[j]:
__pyx_t_1 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
if (__pyx_t_1) {
/* … */
goto __pyx_L32;
}
+296: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 296, __pyx_L1_error)
297: else:
+298: col_data[j] = bool(table.columns[i].boolean_values[j])
/*else*/ {
__pyx_t_7 = __Pyx_PyLong_From_uint8_t(((__pyx_v_table.columns[__pyx_v_i]).boolean_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 298, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 298, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
__pyx_t_7 = __Pyx_PyBool_FromLong((!(!__pyx_t_1))); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 298, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 298, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L32:;
299: else:
+300: if j < nm and table.columns[i].null_mask[j]:
/*else*/ {
__pyx_t_24 = (__pyx_v_j < __pyx_v_nm);
if (__pyx_t_24) {
} else {
__pyx_t_1 = __pyx_t_24;
goto __pyx_L34_bool_binop_done;
}
__pyx_t_24 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
__pyx_t_1 = __pyx_t_24;
__pyx_L34_bool_binop_done:;
if (__pyx_t_1) {
/* … */
goto __pyx_L33;
}
+301: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 301, __pyx_L1_error)
302: else:
+303: col_data[j] = bool(table.columns[i].boolean_values[j])
/*else*/ {
__pyx_t_7 = __Pyx_PyLong_From_uint8_t(((__pyx_v_table.columns[__pyx_v_i]).boolean_values[__pyx_v_j])); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 303, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
__pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 303, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
__pyx_t_7 = __Pyx_PyBool_FromLong((!(!__pyx_t_1))); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 303, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 303, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L33:;
}
__pyx_L31:;
}
304: else: # string
+305: n = table.columns[i].string_values.size()
/*else*/ {
__pyx_v_n = (__pyx_v_table.columns[__pyx_v_i]).string_values.size();
+306: nm = table.columns[i].null_mask.size()
__pyx_v_nm = (__pyx_v_table.columns[__pyx_v_i]).null_mask.size();
+307: col_data = [None] * n
__pyx_t_7 = PyList_New(1 * (__pyx_v_n)); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 307, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); { Py_ssize_t __pyx_temp; for (__pyx_temp=0; __pyx_temp < __pyx_v_n; __pyx_temp++) { __Pyx_INCREF(Py_None); __Pyx_GIVEREF(Py_None); if (__Pyx_PyList_SET_ITEM(__pyx_t_7, __pyx_temp, Py_None) != (0)) __PYX_ERR(0, 307, __pyx_L1_error); } } __Pyx_XDECREF_SET(__pyx_v_col_data, ((PyObject*)__pyx_t_7)); __pyx_t_7 = 0;
+308: for j in range(n):
__pyx_t_21 = __pyx_v_n;
__pyx_t_22 = __pyx_t_21;
for (__pyx_t_23 = 0; __pyx_t_23 < __pyx_t_22; __pyx_t_23+=1) {
__pyx_v_j = __pyx_t_23;
+309: if nm == n:
__pyx_t_1 = (__pyx_v_nm == __pyx_v_n);
if (__pyx_t_1) {
/* … */
goto __pyx_L38;
}
+310: if table.columns[i].null_mask[j]:
__pyx_t_1 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
if (__pyx_t_1) {
/* … */
goto __pyx_L39;
}
+311: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 311, __pyx_L1_error)
312: else:
+313: col_data[j] = table.columns[i].string_values[j].decode('utf-8')
/*else*/ {
__pyx_t_7 = __Pyx_decode_cpp_string(((__pyx_v_table.columns[__pyx_v_i]).string_values[__pyx_v_j]), 0, PY_SSIZE_T_MAX, NULL, NULL, PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 313, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 313, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L39:;
314: else:
+315: if j < nm and table.columns[i].null_mask[j]:
/*else*/ {
__pyx_t_24 = (__pyx_v_j < __pyx_v_nm);
if (__pyx_t_24) {
} else {
__pyx_t_1 = __pyx_t_24;
goto __pyx_L41_bool_binop_done;
}
__pyx_t_24 = (((__pyx_v_table.columns[__pyx_v_i]).null_mask[__pyx_v_j]) != 0);
__pyx_t_1 = __pyx_t_24;
__pyx_L41_bool_binop_done:;
if (__pyx_t_1) {
/* … */
goto __pyx_L40;
}
+316: col_data[j] = None
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, Py_None, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 316, __pyx_L1_error)
317: else:
+318: col_data[j] = table.columns[i].string_values[j].decode('utf-8')
/*else*/ {
__pyx_t_7 = __Pyx_decode_cpp_string(((__pyx_v_table.columns[__pyx_v_i]).string_values[__pyx_v_j]), 0, PY_SSIZE_T_MAX, NULL, NULL, PyUnicode_DecodeUTF8); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 318, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_7);
if (unlikely((__Pyx_SetItemInt(__pyx_v_col_data, __pyx_v_j, __pyx_t_7, size_t, 0, __Pyx_PyLong_FromSize_t, 1, 0, 0, 1) < 0))) __PYX_ERR(0, 318, __pyx_L1_error)
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
}
__pyx_L40:;
}
__pyx_L38:;
}
}
__pyx_L14:;
319:
+320: result['columns'].append(col_data)
__pyx_t_7 = __Pyx_PyDict_GetItem(__pyx_v_result, __pyx_mstate_global->__pyx_n_u_columns); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_18 = __Pyx_PyObject_Append(__pyx_t_7, __pyx_v_col_data); if (unlikely(__pyx_t_18 == ((int)-1))) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; }
321:
+322: return result
__Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_v_result); __pyx_r = __pyx_v_result; goto __pyx_L0;
323:
324:
+325: def detect_csv_dialect(data, sample_size=100):
/* Python wrapper */ static PyObject *__pyx_pw_4rugo_3csv_5detect_csv_dialect(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ); /*proto*/ PyDoc_STRVAR(__pyx_doc_4rugo_3csv_4detect_csv_dialect, "\n Auto-detect CSV dialect (delimiter, quote character).\n \n Parameters\n ----------\n data : bytes or memoryview\n The CSV data to analyze\n sample_size : int, default 100\n Number of rows to sample for detection\n \n Returns\n -------\n dict\n Dictionary with detected dialect parameters:\n - delimiter: str - Detected delimiter character\n - quote_char: str - Quote character\n \n Examples\n --------\n >>> data = b'name\\tage\\tsalary\\nAlice\\t30\\t50000'\n >>> dialect = detect_csv_dialect(data)\n >>> print(dialect['delimiter'])\n '\t'\n "); static PyMethodDef __pyx_mdef_4rugo_3csv_5detect_csv_dialect = {"detect_csv_dialect", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4rugo_3csv_5detect_csv_dialect, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_4rugo_3csv_4detect_csv_dialect}; static PyObject *__pyx_pw_4rugo_3csv_5detect_csv_dialect(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ) { PyObject *__pyx_v_data = 0; PyObject *__pyx_v_sample_size = 0; #if !CYTHON_METH_FASTCALL CYTHON_UNUSED Py_ssize_t __pyx_nargs; #endif CYTHON_UNUSED PyObject *const *__pyx_kwvalues; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("detect_csv_dialect (wrapper)", 0); #if !CYTHON_METH_FASTCALL #if CYTHON_ASSUME_SAFE_SIZE __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); #else __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; #endif #endif __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); { PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_data,&__pyx_mstate_global->__pyx_n_u_sample_size,0}; PyObject* values[2] = {0,0}; const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0; if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 325, __pyx_L3_error) if (__pyx_kwds_len > 0) { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 325, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 325, __pyx_L3_error) CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "detect_csv_dialect", 0) < (0)) __PYX_ERR(0, 325, __pyx_L3_error) if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("detect_csv_dialect", 0, 1, 2, i); __PYX_ERR(0, 325, __pyx_L3_error) } } } else { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 325, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 325, __pyx_L3_error) break; default: goto __pyx_L5_argtuple_error; } if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); } __pyx_v_data = values[0]; __pyx_v_sample_size = values[1]; } goto __pyx_L6_skip; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("detect_csv_dialect", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 325, __pyx_L3_error) __pyx_L6_skip:; goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_AddTraceback("rugo.csv.detect_csv_dialect", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_4rugo_3csv_4detect_csv_dialect(__pyx_self, __pyx_v_data, __pyx_v_sample_size); int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; /* function exit code */ for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4rugo_3csv_4detect_csv_dialect(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data, PyObject *__pyx_v_sample_size) { uint8_t const *__pyx_v_data_ptr; size_t __pyx_v_data_size; PyObject *__pyx_v_data_bytes = 0; Py_buffer __pyx_v_view; bool __pyx_v_have_view; CsvDialect __pyx_v_dialect; PyObject *__pyx_r = NULL; /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_9); __Pyx_AddTraceback("rugo.csv.detect_csv_dialect", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_data_bytes); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4rugo_3csv_5detect_csv_dialect, 0, __pyx_mstate_global->__pyx_n_u_detect_csv_dialect, NULL, __pyx_mstate_global->__pyx_n_u_rugo_csv, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[2])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_mstate_global->__pyx_tuple[2]); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_detect_csv_dialect, __pyx_t_2) < (0)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* … */ __pyx_mstate_global->__pyx_tuple[2] = PyTuple_Pack(1, ((PyObject*)__pyx_mstate_global->__pyx_int_100)); if (unlikely(!__pyx_mstate_global->__pyx_tuple[2])) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_mstate_global->__pyx_tuple[2]); __Pyx_GIVEREF(__pyx_mstate_global->__pyx_tuple[2]);
326: """
327: Auto-detect CSV dialect (delimiter, quote character).
328:
329: Parameters
330: ----------
331: data : bytes or memoryview
332: The CSV data to analyze
333: sample_size : int, default 100
334: Number of rows to sample for detection
335:
336: Returns
337: -------
338: dict
339: Dictionary with detected dialect parameters:
340: - delimiter: str - Detected delimiter character
341: - quote_char: str - Quote character
342:
343: Examples
344: --------
345: >>> data = b'name\\tage\\tsalary\\nAlice\\t30\\t50000'
346: >>> dialect = detect_csv_dialect(data)
347: >>> print(dialect['delimiter'])
348: '\t'
349: """
350: cdef const uint8_t* data_ptr
351: cdef size_t data_size
352: cdef bytes data_bytes
353: cdef Py_buffer view
+354: cdef cbool have_view = False
__pyx_v_have_view = 0;
355:
356: # Handle different input types
+357: if isinstance(data, memoryview):
__pyx_t_1 = PyMemoryView_Check(__pyx_v_data);
if (__pyx_t_1) {
/* … */
goto __pyx_L3;
}
+358: PyObject_GetBuffer(data.obj, &view, PyBUF_CONTIG_RO)
__pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_mstate_global->__pyx_n_u_obj); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 358, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = PyObject_GetBuffer(__pyx_t_2, (&__pyx_v_view), PyBUF_CONTIG_RO); if (unlikely(__pyx_t_3 == ((int)-1))) __PYX_ERR(0, 358, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+359: data_ptr = <const uint8_t*>view.buf
__pyx_v_data_ptr = ((uint8_t const *)__pyx_v_view.buf);
+360: data_size = view.len
__pyx_t_4 = __pyx_v_view.len;
__pyx_v_data_size = __pyx_t_4;
+361: have_view = True
__pyx_v_have_view = 1;
+362: elif isinstance(data, bytes):
__pyx_t_1 = PyBytes_Check(__pyx_v_data);
if (likely(__pyx_t_1)) {
/* … */
goto __pyx_L3;
}
+363: data_bytes = data
__pyx_t_2 = __pyx_v_data;
__Pyx_INCREF(__pyx_t_2);
if (!(likely(PyBytes_CheckExact(__pyx_t_2))||((__pyx_t_2) == Py_None) || __Pyx_RaiseUnexpectedTypeError("bytes", __pyx_t_2))) __PYX_ERR(0, 363, __pyx_L1_error)
__pyx_v_data_bytes = ((PyObject*)__pyx_t_2);
__pyx_t_2 = 0;
+364: data_ptr = <const uint8_t*><char*>data_bytes
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "expected bytes, NoneType found");
__PYX_ERR(0, 364, __pyx_L1_error)
}
__pyx_t_5 = __Pyx_PyBytes_AsWritableString(__pyx_v_data_bytes); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) __PYX_ERR(0, 364, __pyx_L1_error)
__pyx_v_data_ptr = ((uint8_t const *)((char *)__pyx_t_5));
+365: data_size = len(data_bytes)
if (unlikely(__pyx_v_data_bytes == Py_None)) {
PyErr_SetString(PyExc_TypeError, "object of type 'NoneType' has no len()");
__PYX_ERR(0, 365, __pyx_L1_error)
}
__pyx_t_4 = __Pyx_PyBytes_GET_SIZE(__pyx_v_data_bytes); if (unlikely(__pyx_t_4 == ((Py_ssize_t)-1))) __PYX_ERR(0, 365, __pyx_L1_error)
__pyx_v_data_size = __pyx_t_4;
366: else:
+367: raise TypeError(f"Expected bytes or memoryview, got {type(data)}")
/*else*/ {
__pyx_t_6 = NULL;
__Pyx_INCREF(__pyx_builtin_TypeError);
__pyx_t_7 = __pyx_builtin_TypeError;
__pyx_t_8 = __Pyx_PyObject_FormatSimple(((PyObject *)Py_TYPE(__pyx_v_data)), __pyx_mstate_global->__pyx_empty_unicode); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 367, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_8);
__pyx_t_9 = __Pyx_PyUnicode_Concat(__pyx_mstate_global->__pyx_kp_u_Expected_bytes_or_memoryview_got, __pyx_t_8); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 367, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_9);
__Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
__pyx_t_10 = 1;
{
PyObject *__pyx_callargs[2] = {__pyx_t_6, __pyx_t_9};
__pyx_t_2 = __Pyx_PyObject_FastCall(__pyx_t_7, __pyx_callargs+__pyx_t_10, (2-__pyx_t_10) | (__pyx_t_10*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET));
__Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
__Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
__Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 367, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_2);
}
__Pyx_Raise(__pyx_t_2, 0, 0, 0);
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
__PYX_ERR(0, 367, __pyx_L1_error)
}
__pyx_L3:;
368:
369: # Detect dialect
+370: cdef CsvDialect dialect = DetectCsvDialect(data_ptr, data_size, sample_size)
__pyx_t_10 = __Pyx_PyLong_As_size_t(__pyx_v_sample_size); if (unlikely((__pyx_t_10 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 370, __pyx_L1_error) __pyx_v_dialect = DetectCsvDialect(__pyx_v_data_ptr, __pyx_v_data_size, __pyx_t_10);
371:
+372: if have_view:
__pyx_t_1 = (__pyx_v_have_view != 0);
if (__pyx_t_1) {
/* … */
}
+373: PyBuffer_Release(&view)
PyBuffer_Release((&__pyx_v_view));
374:
+375: return {
__Pyx_XDECREF(__pyx_r);
+376: 'delimiter': chr(dialect.delimiter),
__pyx_t_2 = __Pyx_PyDict_NewPresized(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_7 = PyUnicode_FromOrdinal(__pyx_v_dialect.delimiter); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_delimiter, __pyx_t_7) < (0)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+377: 'quote_char': chr(dialect.quote_char)
__pyx_t_7 = PyUnicode_FromOrdinal(__pyx_v_dialect.quote_char); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (PyDict_SetItem(__pyx_t_2, __pyx_mstate_global->__pyx_n_u_quote_char, __pyx_t_7) < (0)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_r = __pyx_t_2; __pyx_t_2 = 0; goto __pyx_L0;
378: }
379:
380:
381: # Convenience function for TSV
+382: def read_tsv(data, columns=None):
/* Python wrapper */ static PyObject *__pyx_pw_4rugo_3csv_7read_tsv(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ); /*proto*/ PyDoc_STRVAR(__pyx_doc_4rugo_3csv_6read_tsv, "\n Read TSV (tab-separated values) data.\n \n This is a convenience wrapper around read_csv with delimiter='\\t'.\n \n Parameters\n ----------\n data : bytes or memoryview\n The TSV data to read\n columns : list of str, optional\n List of column names to read. If None, reads all columns.\n \n Returns\n -------\n dict\n Same as read_csv()\n \n Examples\n --------\n >>> data = b'name\\tage\\tsalary\\nAlice\\t30\\t50000'\n >>> result = read_tsv(data)\n >>> print(result['column_names'])\n ['name', 'age', 'salary']\n "); static PyMethodDef __pyx_mdef_4rugo_3csv_7read_tsv = {"read_tsv", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4rugo_3csv_7read_tsv, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_4rugo_3csv_6read_tsv}; static PyObject *__pyx_pw_4rugo_3csv_7read_tsv(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ) { PyObject *__pyx_v_data = 0; PyObject *__pyx_v_columns = 0; #if !CYTHON_METH_FASTCALL CYTHON_UNUSED Py_ssize_t __pyx_nargs; #endif CYTHON_UNUSED PyObject *const *__pyx_kwvalues; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("read_tsv (wrapper)", 0); #if !CYTHON_METH_FASTCALL #if CYTHON_ASSUME_SAFE_SIZE __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); #else __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; #endif #endif __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); { PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_data,&__pyx_mstate_global->__pyx_n_u_columns,0}; PyObject* values[2] = {0,0}; const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0; if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 382, __pyx_L3_error) if (__pyx_kwds_len > 0) { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 382, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 382, __pyx_L3_error) CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "read_tsv", 0) < (0)) __PYX_ERR(0, 382, __pyx_L3_error) if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)Py_None)); for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("read_tsv", 0, 1, 2, i); __PYX_ERR(0, 382, __pyx_L3_error) } } } else { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 382, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 382, __pyx_L3_error) break; default: goto __pyx_L5_argtuple_error; } if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)Py_None)); } __pyx_v_data = values[0]; __pyx_v_columns = values[1]; } goto __pyx_L6_skip; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("read_tsv", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 382, __pyx_L3_error) __pyx_L6_skip:; goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_AddTraceback("rugo.csv.read_tsv", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_4rugo_3csv_6read_tsv(__pyx_self, __pyx_v_data, __pyx_v_columns); /* function exit code */ for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4rugo_3csv_6read_tsv(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data, PyObject *__pyx_v_columns) { PyObject *__pyx_r = NULL; /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_2); __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_5); __Pyx_AddTraceback("rugo.csv.read_tsv", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4rugo_3csv_7read_tsv, 0, __pyx_mstate_global->__pyx_n_u_read_tsv, NULL, __pyx_mstate_global->__pyx_n_u_rugo_csv, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[3])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_mstate_global->__pyx_tuple[3]); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_read_tsv, __pyx_t_2) < (0)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
383: """
384: Read TSV (tab-separated values) data.
385:
386: This is a convenience wrapper around read_csv with delimiter='\\t'.
387:
388: Parameters
389: ----------
390: data : bytes or memoryview
391: The TSV data to read
392: columns : list of str, optional
393: List of column names to read. If None, reads all columns.
394:
395: Returns
396: -------
397: dict
398: Same as read_csv()
399:
400: Examples
401: --------
402: >>> data = b'name\\tage\\tsalary\\nAlice\\t30\\t50000'
403: >>> result = read_tsv(data)
404: >>> print(result['column_names'])
405: ['name', 'age', 'salary']
406: """
+407: return read_csv(data, columns=columns, delimiter='\t')
__Pyx_XDECREF(__pyx_r); __pyx_t_2 = NULL; __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_mstate_global->__pyx_n_u_read_csv); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 407, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = 1; #if CYTHON_UNPACK_METHODS if (unlikely(PyMethod_Check(__pyx_t_3))) { __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); assert(__pyx_t_2); PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); __Pyx_INCREF(__pyx__function); __Pyx_DECREF_SET(__pyx_t_3, __pyx__function); __pyx_t_4 = 0; } #endif { PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 2 : 0)] = {__pyx_t_2, __pyx_v_data}; __pyx_t_5 = __Pyx_MakeVectorcallBuilderKwds(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 407, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_columns, __pyx_v_columns, __pyx_t_5, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 407, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_delimiter, __pyx_mstate_global->__pyx_kp_u__3, __pyx_t_5, __pyx_callargs+2, 1) < (0)) __PYX_ERR(0, 407, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_3, __pyx_callargs+__pyx_t_4, (2-__pyx_t_4) | (__pyx_t_4*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_5); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 407, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); } __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0;
408:
409:
+410: def get_tsv_schema(data, sample_size=100):
/* Python wrapper */ static PyObject *__pyx_pw_4rugo_3csv_9get_tsv_schema(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ); /*proto*/ PyDoc_STRVAR(__pyx_doc_4rugo_3csv_8get_tsv_schema, "\n Extract schema from TSV data.\n \n This is a convenience wrapper around get_csv_schema with delimiter='\\t'.\n \n Parameters\n ----------\n data : bytes or memoryview\n The TSV data to analyze\n sample_size : int, default 100\n Number of rows to sample for type inference\n \n Returns\n -------\n list of dict\n List of column schemas\n "); static PyMethodDef __pyx_mdef_4rugo_3csv_9get_tsv_schema = {"get_tsv_schema", (PyCFunction)(void(*)(void))(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_4rugo_3csv_9get_tsv_schema, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_4rugo_3csv_8get_tsv_schema}; static PyObject *__pyx_pw_4rugo_3csv_9get_tsv_schema(PyObject *__pyx_self, #if CYTHON_METH_FASTCALL PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds #else PyObject *__pyx_args, PyObject *__pyx_kwds #endif ) { PyObject *__pyx_v_data = 0; PyObject *__pyx_v_sample_size = 0; #if !CYTHON_METH_FASTCALL CYTHON_UNUSED Py_ssize_t __pyx_nargs; #endif CYTHON_UNUSED PyObject *const *__pyx_kwvalues; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("get_tsv_schema (wrapper)", 0); #if !CYTHON_METH_FASTCALL #if CYTHON_ASSUME_SAFE_SIZE __pyx_nargs = PyTuple_GET_SIZE(__pyx_args); #else __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL; #endif #endif __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs); { PyObject ** const __pyx_pyargnames[] = {&__pyx_mstate_global->__pyx_n_u_data,&__pyx_mstate_global->__pyx_n_u_sample_size,0}; PyObject* values[2] = {0,0}; const Py_ssize_t __pyx_kwds_len = (__pyx_kwds) ? __Pyx_NumKwargs_FASTCALL(__pyx_kwds) : 0; if (unlikely(__pyx_kwds_len) < 0) __PYX_ERR(0, 410, __pyx_L3_error) if (__pyx_kwds_len > 0) { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 410, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 410, __pyx_L3_error) CYTHON_FALLTHROUGH; case 0: break; default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "get_tsv_schema", 0) < (0)) __PYX_ERR(0, 410, __pyx_L3_error) if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("get_tsv_schema", 0, 1, 2, i); __PYX_ERR(0, 410, __pyx_L3_error) } } } else { switch (__pyx_nargs) { case 2: values[1] = __Pyx_ArgRef_FASTCALL(__pyx_args, 1); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[1])) __PYX_ERR(0, 410, __pyx_L3_error) CYTHON_FALLTHROUGH; case 1: values[0] = __Pyx_ArgRef_FASTCALL(__pyx_args, 0); if (!CYTHON_ASSUME_SAFE_MACROS && unlikely(!values[0])) __PYX_ERR(0, 410, __pyx_L3_error) break; default: goto __pyx_L5_argtuple_error; } if (!values[1]) values[1] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_100))); } __pyx_v_data = values[0]; __pyx_v_sample_size = values[1]; } goto __pyx_L6_skip; __pyx_L5_argtuple_error:; __Pyx_RaiseArgtupleInvalid("get_tsv_schema", 0, 1, 2, __pyx_nargs); __PYX_ERR(0, 410, __pyx_L3_error) __pyx_L6_skip:; goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_AddTraceback("rugo.csv.get_tsv_schema", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_4rugo_3csv_8get_tsv_schema(__pyx_self, __pyx_v_data, __pyx_v_sample_size); int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; /* function exit code */ for (Py_ssize_t __pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) { Py_XDECREF(values[__pyx_temp]); } __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_4rugo_3csv_8get_tsv_schema(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_data, PyObject *__pyx_v_sample_size) { PyObject *__pyx_r = NULL; /* … */ __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_4rugo_3csv_9get_tsv_schema, 0, __pyx_mstate_global->__pyx_n_u_get_tsv_schema, NULL, __pyx_mstate_global->__pyx_n_u_rugo_csv, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[4])); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 410, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_mstate_global->__pyx_tuple[2]); if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_get_tsv_schema, __pyx_t_2) < (0)) __PYX_ERR(0, 410, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
411: """
412: Extract schema from TSV data.
413:
414: This is a convenience wrapper around get_csv_schema with delimiter='\\t'.
415:
416: Parameters
417: ----------
418: data : bytes or memoryview
419: The TSV data to analyze
420: sample_size : int, default 100
421: Number of rows to sample for type inference
422:
423: Returns
424: -------
425: list of dict
426: List of column schemas
427: """
+428: return get_csv_schema(data, delimiter='\t', sample_size=sample_size)
__Pyx_XDECREF(__pyx_r); __pyx_t_2 = NULL; __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_mstate_global->__pyx_n_u_get_csv_schema); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_4 = 1; #if CYTHON_UNPACK_METHODS if (unlikely(PyMethod_Check(__pyx_t_3))) { __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3); assert(__pyx_t_2); PyObject* __pyx__function = PyMethod_GET_FUNCTION(__pyx_t_3); __Pyx_INCREF(__pyx_t_2); __Pyx_INCREF(__pyx__function); __Pyx_DECREF_SET(__pyx_t_3, __pyx__function); __pyx_t_4 = 0; } #endif { PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 2 : 0)] = {__pyx_t_2, __pyx_v_data}; __pyx_t_5 = __Pyx_MakeVectorcallBuilderKwds(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_delimiter, __pyx_mstate_global->__pyx_kp_u__3, __pyx_t_5, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 428, __pyx_L1_error) if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_sample_size, __pyx_v_sample_size, __pyx_t_5, __pyx_callargs+2, 1) < (0)) __PYX_ERR(0, 428, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_3, __pyx_callargs+__pyx_t_4, (2-__pyx_t_4) | (__pyx_t_4*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_5); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); } __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0;