From c76b799a5ae32db3869e6f989fee4f7f1da2b8a9 Mon Sep 17 00:00:00 2001 From: Linlang Date: Fri, 19 Sep 2025 16:48:37 +0800 Subject: [PATCH] Fixed: value error caused by incorrect date format in daily data during the normalize process --- scripts/data_collector/base.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/scripts/data_collector/base.py b/scripts/data_collector/base.py index 2efc2feadc..672e593fa5 100644 --- a/scripts/data_collector/base.py +++ b/scripts/data_collector/base.py @@ -280,11 +280,20 @@ def __init__( self._symbol_field_name = symbol_field_name self._end_date = kwargs.get("end_date", None) self._max_workers = max_workers + self.interval = kwargs.get("interval", "1d") self._normalize_obj = normalize_class( date_field_name=date_field_name, symbol_field_name=symbol_field_name, **kwargs ) + def format_data(self, df: pd.DataFrame): + if self.interval == "1d": + try: + pd.to_datetime(df.iloc[-1]["date"], format="%Y-%m-%d", errors="raise") + except Exception: + df = df.iloc[:-1] + return df + def _executor(self, file_path: Path): file_path = Path(file_path) @@ -300,14 +309,18 @@ def _executor(self, file_path: Path): keep_default_na=False, na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns}, ) - - # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified. - df = self._normalize_obj.normalize(df) - if df is not None and not df.empty: - if self._end_date is not None: - _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date) - df = df[_mask] - df.to_csv(self._target_dir.joinpath(file_path.name), index=False) + df = self.format_data(df=df) + + if not df.empty: + # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified. + df = self._normalize_obj.normalize(df) + if df is not None and not df.empty: + if self._end_date is not None: + _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date) + df = df[_mask] + df.to_csv(self._target_dir.joinpath(file_path.name), index=False) + else: + logger.warning(f"{file_path.stem} source data is empty and will not undergo normalization processing.") def normalize(self): logger.info("normalize data......")