Рефакторинг, добавляю пайплайн

2025-11-11 23:07:52 +03:00
parent 0db1509f0f
commit 6abceda30e
14 changed files with 222 additions and 0 deletions
--- a/src/mail_order_bot/config2.yml
+++ b/src/mail_order_bot/config2.yml
--- a/src/mail_order_bot/configs/todx.ru.yml
+++ b/src/mail_order_bot/configs/todx.ru.yml
@@ -0,0 +1,17 @@
 pipeline:
  - handler: "ConfigurableExcelParser"
    result_section: "positions"
    config:
      sheet_name: 0
      key_field: "Код детали"
      mapping:
        article: "Код детали"
        manufacturer: "Производитель"
        name: "Наименование"
        price: "Цена\nдетали"
        quantity: "Кол-\nво"
        total: "Сумма"
--- a/src/mail_order_bot/task_handler/init.py
+++ b/src/mail_order_bot/task_handler/init.py
@@ -0,0 +1 @@
 from .processor import TaskProcessor
--- a/src/mail_order_bot/task_handler/abcp_client/OrderCreator.py
+++ b/src/mail_order_bot/task_handler/abcp_client/OrderCreator.py
--- a/src/mail_order_bot/task_handler/abstract_task.py
+++ b/src/mail_order_bot/task_handler/abstract_task.py
@@ -0,0 +1,25 @@
 import logging
 import pandas as pd
 from abc import ABC, abstractmethod
 from typing import Dict, Any, List
 from io import BytesIO
 logger = logging.getLogger(__name__)
 class AbstractHandler(ABC):
    """
    Абстрактный базовый класс для всех хэндлеров.
    """
    def __init__(self, config: Dict[str, Any], context: Dict[str, Any],*args, **kwargs) -> None:
        self.config = config
        self.context = context
    @abstractmethod
    def do(self, *args, **kwargs) -> Dict[str, Any]:
        """
        Парсит Excel файл и возвращает список позиций.
        Должен быть реализован в каждом конкретном парсере.
        """
        pass
--- a/src/mail_order_bot/task_handler/excel_parsers/init.py
+++ b/src/mail_order_bot/task_handler/excel_parsers/init.py
--- a/src/mail_order_bot/task_handler/excel_parsers/basic_excel_parcer.py
+++ b/src/mail_order_bot/task_handler/excel_parsers/basic_excel_parcer.py
@@ -0,0 +1,114 @@
 import logging
 import pandas as pd
 from typing import Dict, Any, Optional, List
 from decimal import Decimal
 from io import BytesIO
 from .order_position import OrderPosition
 from ..abstract_task import AbstractTask
 logger = logging.getLogger(__name__)
 class BasicExcelParser(AbstractTask):
    RESULT_SECTION = "positions"
    """
    Универсальный парсер, настраиваемый через конфигурацию.
    Подходит для большинства стандартных случаев.
    """
    def do(self) -> List[OrderPosition]:
        # todo сделать проверку на наличие файла и его тип
        file_bytes = BytesIO(self.context.get("attachment")) # self.context.get("attachment") #
        try:
            df = self._make_dataframe(file_bytes)
            # Получаем маппинг колонок из конфигурации
            mapping = self.config['mapping']
            # Парсим строки
            positions = []
            for idx, row in df.iterrows():
                try:
                    position = self._parse_row(row, mapping)
                    if position:
                        positions.append(position)
                except Exception as e:
                    logger.error(f"Ошибка парсинга строки {idx}: {e}, {row}")
                    continue
            logger.info(f"Успешно обработано {len(positions)} позиций из {len(df)} строк")
            self.context[self.RESULT_SECTION] = positions
        except Exception as e:
            logger.error(f"Ошибка при обработке файла: {e}")
            raise Exception from e
    def _parse_row(self, row: pd.Series, mapping: Dict[str, str]) -> Optional[OrderPosition]:
        """Парсит одну строку Excel в OrderPosition"""
        # Проверяем обязательные поля
        required_fields = ['article', 'price', 'quantity']
        for field in required_fields:
            if pd.isna(row.get(mapping[field])):
                logger.warning(f"Позиция не создана - не заполнено поле {mapping[field]}")
                return None
        price = Decimal(str(row[mapping['price']]).replace(",", ".").strip())
        quantity = int(row[mapping['quantity']])
        if "total" in mapping.keys():
            total = Decimal(str(row[mapping['total']]).replace(",", ".").strip())
        else:
            total = price * quantity
        if mapping.get('name', "") in mapping.keys():
            name = str(row[mapping.get('name', "")]).strip()
        else:
            name = ""
        # Создаем объект позиции
        position = OrderPosition(
            article=str(row[mapping['article']]).strip(),
            manufacturer=str(row[mapping.get('manufacturer', "")]).strip(),
            name=name,
            price=price,
            quantity=quantity,
            total=total,
            additional_attrs=self._extract_additional_attrs(row, mapping)
        )
        return position
    def _extract_additional_attrs(self, row: pd.Series, mapping: Dict[str, str]) -> Dict[str, Any]:
        """Извлекает дополнительные атрибуты, не входящие в основную модель"""
        additional = {}
        mapped_columns = set(mapping.values())
        for col in row.index:
            if col not in mapped_columns and not pd.isna(row[col]):
                additional[col] = row[col]
        return additional
    def _make_dataframe(self, bio) -> pd.DataFrame:
        # Получаем все данные из файла
        sheet_name = self.config.get("sheet_name", 0)
        df_full = pd.read_excel(bio, sheet_name=sheet_name, header=None)
        # Находим индекс строки с заголовком
        key_field = self.config.get("key_field")
        header_row_idx = df_full[
            df_full.apply(lambda row: row.astype(str).str.contains(key_field, case=False, na=False).any(),
                          axis=1)].index[0]
        # Считываем таблицу с правильным заголовком
        df = pd.read_excel(bio, header=header_row_idx, sheet_name=sheet_name, engine='calamine')  # openpyxl  calamine
        # Находим индекс первой строки с пустым 'Артикул'
        first_empty_index = df[df[key_field].isna()].index.min()
        # Обрезаем DataFrame до первой пустой строки (не включая её)
        df_trimmed = df.loc[:first_empty_index - 1]
        return df_trimmed
--- a/src/mail_order_bot/task_handler/excel_parsers/order_position.py
+++ b/src/mail_order_bot/task_handler/excel_parsers/order_position.py
@@ -0,0 +1,25 @@
 from dataclasses import dataclass, field
 from typing import Dict, Any
 from decimal import Decimal
@dataclass
 class OrderPosition:
    """
    Унифицированная модель позиции для заказа.
    Все контрагенты приводятся к этой структуре.
    """
    article: str  # Артикул товара
    manufacturer: str  # Производитель
    name: str  # Наименование
    price: Decimal  # Цена за единицу
    quantity: int  # Количество
    total: Decimal  # Общая сумма
    additional_attrs: Dict[str, Any] = field(default_factory=dict)
    def __post_init__(self):
        """Валидация после инициализации"""
        if self.quantity < 0:
            raise ValueError(f"Количество не может быть отрицательным: {self.quantity}")
        if self.price < 0:
            raise ValueError(f"Цена не может быть отрицательной: {self.price}")
--- a/src/mail_order_bot/task_handler/notifiers/init.py
+++ b/src/mail_order_bot/task_handler/notifiers/init.py
--- a/src/mail_order_bot/task_handler/notifiers/test_notifier.py
+++ b/src/mail_order_bot/task_handler/notifiers/test_notifier.py
--- a/src/mail_order_bot/task_handler/order.py
+++ b/src/mail_order_bot/task_handler/order.py
--- a/src/mail_order_bot/task_handler/processor.py
+++ b/src/mail_order_bot/task_handler/processor.py
@@ -0,0 +1,40 @@
 from pathlib import Path
 import os
 import yaml
 import json
 from typing import Dict, Any
 from pathlib import Path
 from ..excel_processor.configurable_parser import ConfigurableExcelParser
 class TaskProcessor:
    def __init__(self, config_path: Path):
        self.config_path = config_path
        self.context = {}
    def process(self, client, file_object):
        config = self._load_config(client)
        for stage in config["pipeline"]:
            handler_name = stage["handler"]
            config = stage["config"]
            handler = globals()[handler_name](config)
            self.context["positions"] = handler.parse(file_object)
        return self.context["positions"]
        pass
    def _load_config(self, client) -> Dict[str, Any]:
        """Загружает конфигурацию из YAML или JSON"""
        path = os.path.join(self.config_path, client + '.yml')
        with open(path, 'r', encoding='utf-8') as f:
            return yaml.safe_load(f)
--- a/tests/excel_processor/configs/todx.ru.yml
+++ b/tests/excel_processor/configs/todx.ru.yml
--- a/tests/excel_processor/hanler_test.py
+++ b/tests/excel_processor/hanler_test.py