From c76b799a5ae32db3869e6f989fee4f7f1da2b8a9 Mon Sep 17 00:00:00 2001
From: Linlang <Lv.Linlang@hotmail.com>
Date: Fri, 19 Sep 2025 16:48:37 +0800
Subject: [PATCH] Fixed: value error caused by incorrect date format in daily
 data during the normalize process

---
 scripts/data_collector/base.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/scripts/data_collector/base.py b/scripts/data_collector/base.py
index 2efc2feadc..672e593fa5 100644
--- a/scripts/data_collector/base.py
+++ b/scripts/data_collector/base.py
@@ -280,11 +280,20 @@ def __init__(
         self._symbol_field_name = symbol_field_name
         self._end_date = kwargs.get("end_date", None)
         self._max_workers = max_workers
+        self.interval = kwargs.get("interval", "1d")
 
         self._normalize_obj = normalize_class(
             date_field_name=date_field_name, symbol_field_name=symbol_field_name, **kwargs
         )
 
+    def format_data(self, df: pd.DataFrame):
+        if self.interval == "1d":
+            try:
+                pd.to_datetime(df.iloc[-1]["date"], format="%Y-%m-%d", errors="raise")
+            except Exception:
+                df = df.iloc[:-1]
+        return df
+
     def _executor(self, file_path: Path):
         file_path = Path(file_path)
 
@@ -300,14 +309,18 @@ def _executor(self, file_path: Path):
             keep_default_na=False,
             na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
         )
-
-        # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
-        df = self._normalize_obj.normalize(df)
-        if df is not None and not df.empty:
-            if self._end_date is not None:
-                _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
-                df = df[_mask]
-            df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
+        df = self.format_data(df=df)
+
+        if not df.empty:
+            # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
+            df = self._normalize_obj.normalize(df)
+            if df is not None and not df.empty:
+                if self._end_date is not None:
+                    _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
+                    df = df[_mask]
+                df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
+        else:
+            logger.warning(f"{file_path.stem} source data is empty and will not undergo normalization processing.")
 
     def normalize(self):
         logger.info("normalize data......")