chdb-io · wudidapaopao · Sep 24, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml
@@ -123,7 +123,7 @@ jobs:
           which clang++-19
           clang++-19 --version
           sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget
-          # Install WebAssembly linker (wasm-ld) 
+          # Install WebAssembly linker (wasm-ld)
           sudo apt-get install -y lld-19
           # Create symlink for wasm-ld
           if ! command -v wasm-ld &> /dev/null; then
@@ -263,7 +263,17 @@ jobs:
             pyenv shell $version
             python -m pip install dist/*.whl --force-reinstall
             python -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(f'Python $version: {res}')"
+
+            # First test: without optional dependencies
+            echo "Testing without pandas and pyarrow..."
+            python -m pip uninstall -y pandas pyarrow || true
+            make test
+
+            # Second test: with optional dependencies
+            echo "Testing with pandas and pyarrow..."
+            python -m pip install pandas pyarrow
             make test
+
             pyenv shell --unset
           done
         continue-on-error: false

diff --git a/.github/workflows/build_linux_x86_wheels.yml b/.github/workflows/build_linux_x86_wheels.yml
@@ -262,7 +262,17 @@ jobs:
             pyenv shell $version
             python -m pip install dist/*.whl --force-reinstall
             python -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(f'Python $version: {res}')"
+
+            # First test: without optional dependencies
+            echo "Testing without pandas and pyarrow..."
+            python -m pip uninstall -y pandas pyarrow || true
+            make test
+
+            # Second test: with optional dependencies
+            echo "Testing with pandas and pyarrow..."
+            python -m pip install pandas pyarrow
             make test
+
             pyenv shell --unset
           done
         continue-on-error: false

diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml
@@ -251,6 +251,15 @@ jobs:
             pyenv shell $version
             python -m pip install dist/*.whl --force-reinstall --no-cache-dir
             python -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(f'Python $version: {res}')"
+
+            # First test: without optional dependencies
+            echo "Testing without pandas and pyarrow..."
+            python -m pip uninstall -y pandas pyarrow || true
+            make test
+
+            # Second test: with optional dependencies
+            echo "Testing with pandas and pyarrow..."
+            python -m pip install pandas pyarrow
             make test
             python -m pip uninstall -y chdb
             pyenv shell --unset

diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml
@@ -253,7 +253,17 @@ jobs:
             pyenv shell $version
             python -m pip install dist/*.whl --force-reinstall
             python -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(f'Python $version: {res}')"
+
+            # First test: without optional dependencies
+            echo "Testing without pandas and pyarrow..."
+            python -m pip uninstall -y pandas pyarrow || true
+            make test
+
+            # Second test: with optional dependencies
+            echo "Testing with pandas and pyarrow..."
+            python -m pip install pandas pyarrow
             make test
+
             pyenv shell --unset
           done
         continue-on-error: false

diff --git a/chdb/__init__.py b/chdb/__init__.py
@@ -96,11 +96,10 @@ def to_arrowTable(res):
     # try import pyarrow and pandas, if failed, raise ImportError with suggestion
     try:
         import pyarrow as pa  # noqa
-        import pandas as pd  # noqa
     except ImportError as e:
         print(f"ImportError: {e}")
-        print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
-        raise ImportError("Failed to import pyarrow or pandas") from None
+        print('Please install pyarrow via "pip install pyarrow"')
+        raise ImportError("Failed to import pyarrow") from None
     if len(res) == 0:
         return pa.Table.from_batches([], schema=pa.schema([]))
 

diff --git a/chdb/dataframe/query.py b/chdb/dataframe/query.py
@@ -2,8 +2,13 @@
 import tempfile
 from io import BytesIO
 import re
-import pandas as pd
-import pyarrow as pa
+try:
+    import pandas as pd
+    import pyarrow as pa
+except ImportError as e:
+    print(f'ImportError: {e}')
+    print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
+    raise ImportError('Failed to import pyarrow or pandas') from None
 from chdb import query as chdb_query
 
 

diff --git a/chdb/state/sqlitelike.py b/chdb/state/sqlitelike.py
@@ -1,13 +1,32 @@
-from typing import Optional, Any
+from typing import Optional, Any, TYPE_CHECKING, List, Tuple
 from chdb import _chdb
 
-# try import pyarrow if failed, raise ImportError with suggestion
-try:
-    import pyarrow as pa  # noqa
-except ImportError as e:
-    print(f"ImportError: {e}")
-    print('Please install pyarrow via "pip install pyarrow"')
-    raise ImportError("Failed to import pyarrow") from None
+if TYPE_CHECKING:
+    import pyarrow as pa
+
+
+def _import_pyarrow():
+    """Lazy import pyarrow when needed."""
+    try:
+        import pyarrow as pa
+        return pa
+    except ImportError:
+        raise ImportError(
+            "PyArrow is required for this feature. "
+            "Install with: pip install pyarrow"
+        ) from None
+
+
+def _import_pandas():
+    """Lazy import pandas when needed."""
+    try:
+        import pandas as pd
+        return pd
+    except ImportError:
+        raise ImportError(
+            "Pandas is required for DataFrame conversion. "
+            "Install with: pip install pandas"
+        ) from None
 
 
 _arrow_format = set({"dataframe", "arrowtable"})
@@ -32,11 +51,11 @@ def to_arrowTable(res):
         pyarrow.Table: PyArrow Table containing the query results
 
     Raises:
-        ImportError: If pyarrow or pandas packages are not installed
+        ImportError: If pyarrow package is not installed
 
     .. note::
-        This function requires both pyarrow and pandas to be installed.
-        Install them with: ``pip install pyarrow pandas``
+        This function requires pyarrow to be installed.
+        Install with: ``pip install pyarrow``
 
     .. warning::
         Empty results return an empty PyArrow Table with no schema.
@@ -52,14 +71,7 @@ def to_arrowTable(res):
            num   text
         0    1  hello
     """
-    # try import pyarrow and pandas, if failed, raise ImportError with suggestion
-    try:
-        import pyarrow as pa  # noqa
-        import pandas as pd  # noqa
-    except ImportError as e:
-        print(f"ImportError: {e}")
-        print('Please install pyarrow and pandas via "pip install pyarrow pandas"')
-        raise ImportError("Failed to import pyarrow or pandas") from None
+    pa = _import_pyarrow()
     if len(res) == 0:
         return pa.Table.from_batches([], schema=pa.schema([]))
 
@@ -104,6 +116,7 @@ def to_df(r):
         text    object
         dtype: object
     """
+    _import_pandas()
     t = to_arrowTable(r)
     return t.to_pandas(use_threads=True)
 
@@ -232,7 +245,7 @@ def cancel(self):
             except Exception as e:
                 raise RuntimeError(f"Failed to cancel streaming query: {str(e)}") from e
 
-    def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
+    def record_batch(self, rows_per_batch: int = 1000000) -> "pa.RecordBatchReader":
         """
         Create a PyArrow RecordBatchReader from this StreamingResult.
 
@@ -244,17 +257,19 @@ def record_batch(self, rows_per_batch: int = 1000000) -> pa.RecordBatchReader:
             rows_per_batch (int): Number of rows per batch. Defaults to 1000000.
 
         Returns:
-            pa.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
+            pyarrow.RecordBatchReader: PyArrow RecordBatchReader for efficient streaming
 
         Raises:
             ValueError: If the StreamingResult was not created with arrow format
+            ImportError: If PyArrow is not installed
         """
         if not self._supports_record_batch:
             raise ValueError(
                 "record_batch() can only be used with arrow format. "
                 "Please use format='Arrow' when calling send_query."
             )
 
+        pa = _import_pyarrow()
         chdb_reader = ChdbRecordBatchReader(self, rows_per_batch)
         return pa.RecordBatchReader.from_batches(chdb_reader.schema(), chdb_reader)
 
@@ -277,10 +292,12 @@ def __init__(self, chdb_stream_result, batch_size_rows):
         self._current_rows = 0
         self._first_batch = None
         self._first_batch_consumed = True
+        self._pa = _import_pyarrow()
         self._schema = self.schema()
 
     def schema(self):
         if self._schema is None:
+            pa = self._pa
             # Get the first chunk to determine schema
             chunk = self._stream_result.fetch()
             if chunk is not None:
@@ -306,6 +323,8 @@ def schema(self):
         return self._schema
 
     def read_next_batch(self):
+        pa = self._pa
+
         if self._accumulator:
             result = self._accumulator.pop(0)
             return result
@@ -602,7 +621,7 @@ class Cursor:
     def __init__(self, connection):
         self._conn = connection
         self._cursor = self._conn.cursor()
-        self._current_table: Optional[pa.Table] = None
+        self._current_table: Optional[List[Tuple]] = None
         self._current_row: int = 0
 
     def execute(self, query: str) -> None:

diff --git a/programs/local/PythonConversion.cpp b/programs/local/PythonConversion.cpp
@@ -285,12 +285,15 @@ void convert_to_json_str(const py::handle & obj, String & ret)
     d.SetObject();
     rapidjson::Document::AllocatorType & allocator = d.GetAllocator();
 
+    auto sys_modules = py::module_::import("sys").attr("modules");
+    bool has_numpy = sys_modules.contains(py::str("numpy"));
+
     std::function<void(const py::handle &, rapidjson::Value &)> convert;
     convert = [&](const py::handle & obj, rapidjson::Value & json_value) {
         if (py::isinstance<py::dict>(obj))
         {
             json_value.SetObject();
-            for (auto & item : py::cast<py::dict>(obj))
+            for (const auto & item : py::cast<py::dict>(obj))
             {
                 rapidjson::Value key;
                 auto key_str = py::str(item.first).cast<std::string>();
@@ -306,7 +309,7 @@ void convert_to_json_str(const py::handle & obj, String & ret)
         {
             json_value.SetArray();
             auto tmp_list = py::cast<py::list>(obj);
-            for (auto & item : tmp_list)
+            for (const auto & item : tmp_list)
             {
                 rapidjson::Value element;
                 convert(item, element);
@@ -317,14 +320,14 @@ void convert_to_json_str(const py::handle & obj, String & ret)
         {
             json_value.SetArray();
             auto tmp_tuple = py::cast<py::tuple>(obj);
-            for (auto & item : tmp_tuple)
+            for (const auto & item : tmp_tuple)
             {
                 rapidjson::Value element;
                 convert(item, element);
                 json_value.PushBack(element, allocator);
             }
         }
-        else if (py::isinstance<py::array>(obj))
+        else if (has_numpy && py::isinstance<py::array>(obj))
         {
             auto arr = py::cast<py::array>(obj);
             json_value.SetArray();
@@ -337,7 +340,7 @@ void convert_to_json_str(const py::handle & obj, String & ret)
                 auto item = my_list.attr("__getitem__")(i);
                 convert(item, element);
                 json_value.PushBack(element, allocator);
-		    }
+            }
         }
         else
         {

diff --git a/programs/local/PythonTableCache.cpp b/programs/local/PythonTableCache.cpp
@@ -1,4 +1,5 @@
 #include "PythonTableCache.h"
+#include "PandasDataFrame.h"
 #include "PybindWrapper.h"
 #include "PythonUtils.h"
 
@@ -32,7 +33,7 @@ static py::object findQueryableObj(const String & var_name)
                 {
                     // Get the object using Python's indexing syntax
                     obj = namespace_obj[py::cast(var_name)];
-                    if (DB::isInheritsFromPyReader(obj) || DB::isPandasDf(obj) || DB::isPyarrowTable(obj) || DB::hasGetItem(obj))
+                    if (DB::isInheritsFromPyReader(obj) || PandasDataFrame::isPandasDataframe(obj) || DB::isPyarrowTable(obj) || DB::hasGetItem(obj))
                     {
                         return obj;
                     }

diff --git a/programs/local/PythonUtils.h b/programs/local/PythonUtils.h
@@ -1,8 +1,8 @@
 #pragma once
 
 #include "config.h"
+#include "PybindWrapper.h"
 
-#include <cstddef>
 #include <Columns/ColumnString.h>
 #include <Columns/IColumn.h>
 #include <DataTypes/Serializations/SerializationNumber.h>
@@ -68,35 +68,21 @@ inline bool isInheritsFromPyReader(const py::object & obj)
     return execWithGIL([&]() { return _isInheritsFromPyReader(obj); });
 }
 
-// Helper function to check if object is a pandas DataFrame
-inline bool isPandasDf(const py::object & obj)
-{
-    return execWithGIL(
-        [&]()
-        {
-            auto pd_data_frame_type = py::module_::import("pandas").attr("DataFrame");
-            return py::isinstance(obj, pd_data_frame_type);
-        });
-}
-
 // Helper function to check if object is a PyArrow Table
 inline bool isPyarrowTable(const py::object & obj)
 {
-    return execWithGIL(
-        [&]()
-        {
-            auto table_type = py::module_::import("pyarrow").attr("Table");
-            return py::isinstance(obj, table_type);
-        });
+    chassert(py::gil_check());
+    auto dict = py::module_::import("sys").attr("modules");
+    if (!dict.contains(py::str("pyarrow")))
+        return false;
+
+    return py::isinstance(obj, py::module_::import("pyarrow").attr("Table"));
 }
 
 inline bool hasGetItem(const py::object & obj)
 {
-    return execWithGIL(
-        [&]()
-        {
-            return py::hasattr(obj, "__getitem__");
-        });
+    chassert(py::gil_check());
+    return py::hasattr(obj, "__getitem__");
 }
 
 // Specific wrappers for common use cases
@@ -105,11 +91,6 @@ inline auto castToPyList(const py::object & obj)
     return execWithGIL([&]() { return obj.cast<py::list>(); });
 }
 
-inline auto castToPyArray(const py::object & obj)
-{
-    return execWithGIL([&]() { return obj.cast<py::array>(); });
-}
-
 inline std::string castToStr(const py::object & obj)
 {
     return execWithGIL([&]() { return py::str(obj).cast<std::string>(); });

diff --git a/setup.py b/setup.py
@@ -175,9 +175,12 @@ def build_extensions(self):
             ext_modules=ext_modules,
             python_requires=">=3.8",
             install_requires=[
-                "pyarrow>=13.0.0",
-                "pandas>=2.0.0",
             ],
+            extras_require={
+                "arrow": ["pandas>=2.0.0", "pyarrow>=13.0.0"],
+                "pandas": ["pandas>=2.0.0", "pyarrow>=13.0.0"],
+                "all": ["pandas>=2.0.0", "pyarrow>=13.0.0"],
+            },
             cmdclass={"build_ext": BuildExt},
             test_suite="tests",
             zip_safe=False,