From cfefb49ef8ece9b4f57826e73237ede61d16d612 Mon Sep 17 00:00:00 2001 From: zosimovaa Date: Fri, 7 Nov 2025 21:06:00 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D1=87=D0=B8=D0=BD=D0=B8=D0=BB=20?= =?UTF-8?q?=D0=BF=D0=B0=D1=80=D1=81=D0=B8=D0=BD=D0=B3=20=D0=B0=D0=B4=D1=80?= =?UTF-8?q?=D0=B5=D1=81=D0=BE=D0=B2=20=D1=8D=D0=BB=20=D0=BF=D0=BE=D1=87?= =?UTF-8?q?=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mail_order_bot/email_client/client.py | 64 ++++++++++++---------- src/mail_order_bot/email_client/objects.py | 1 + src/mail_order_bot/main.py | 1 + tests/email_client/test_email_client.py | 5 +- 4 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/mail_order_bot/email_client/client.py b/src/mail_order_bot/email_client/client.py index b281f42..54c1045 100644 --- a/src/mail_order_bot/email_client/client.py +++ b/src/mail_order_bot/email_client/client.py @@ -109,27 +109,16 @@ class EmailClient: 07.10.2025, 16:01, Имя (email@example.com): Кому: ... """ - # Ищем первую секцию пересылаемого сообщения (по структуре письма) - match = re.search( - r"-{8,}\\s*Пересылаемое сообщение\\s*-{8,}.*?(\\d{2}\\.\\d{2}\\.\\d{4},\\s*\\d{2}:\\d{2},.*?)\\(([^\\)]+)\\):", - body, re.DOTALL) - emails = [] + # Ищем email внутри скобок после строки "Пересылаемое сообщение" + pattern = r"Пересылаемое сообщение.*?\((.*?)\)" + match = re.search(pattern, body, re.DOTALL) if match: - emails.append(match.group(2)) # email из первой строки пересыла - # Ищем все email в первой пересылаемой секции (например, в "Кому:") - forwarded_section = re.search( - r"^-{8,}.*?Пересылаемое сообщение.*?:$(.*?)(?:^[-=]{5,}|\\Z)", - body, re.MULTILINE | re.DOTALL) - if forwarded_section: - addresses = re.findall(r"\\b([\\w\\.-]+@[\\w\\.-]+)\\b", forwarded_section.group(1)) - for addr in addresses: - if addr not in emails: - emails.append(addr) - return emails + return match.group(1) + return None def _extract_body(self, msg: email.message.Message) -> str: """ - Извлечь текст письма. + Извлечь текст письма из любого типа содержимого, кроме вложений. Args: msg: Объект письма @@ -138,27 +127,38 @@ class EmailClient: Текст письма """ body = "" - + if msg.is_multipart(): for part in msg.walk(): - content_type = part.get_content_type() content_disposition = str(part.get("Content-Disposition", "")) - - # Ищем текстовые части без вложений - if content_type == "text/plain" and "attachment" not in content_disposition: - try: - charset = part.get_content_charset() or 'utf-8' - body += part.get_payload(decode=True).decode(charset, errors='ignore') - except: - pass + # Пропускаем вложения + if "attachment" in content_disposition.lower(): + continue + try: + charset = part.get_content_charset() or 'utf-8' + payload = part.get_payload(decode=True) + if payload: + body_piece = payload.decode(charset, errors='ignore') + body += body_piece + except Exception: + pass else: try: charset = msg.get_content_charset() or 'utf-8' - body = msg.get_payload(decode=True).decode(charset, errors='ignore') - except: + payload = msg.get_payload(decode=True) + if payload: + body = payload.decode(charset, errors='ignore') + except Exception: pass - + return body + + def __extract_email(self, text: str) -> str: + match = re.search(r'<([^<>]+)>', text) + if match: + return match.group(1) + return None + def _extract_attachments(self, msg: email.message.Message) -> List[EmailAttachment]: """ @@ -238,6 +238,8 @@ class EmailClient: # Извлекаем данные from_addr = self._decode_header(msg.get("From", "")) subject = self._decode_header(msg.get("Subject", "")) + + from_email = self.__extract_email(from_addr) # Получаем дату date_str = msg.get("Date", "") @@ -254,6 +256,7 @@ class EmailClient: # Извлекаем тело письма body = self._extract_body(msg) + #print(body) first_sender = self._extract_first_sender(body) # Извлекаем вложения @@ -262,6 +265,7 @@ class EmailClient: # Создаем объект письма email_obj = EmailMessage( from_addr=from_addr, + from_email=from_email, subj=subject, dt=dt, body=body, diff --git a/src/mail_order_bot/email_client/objects.py b/src/mail_order_bot/email_client/objects.py index d3b73b0..f4ebb24 100644 --- a/src/mail_order_bot/email_client/objects.py +++ b/src/mail_order_bot/email_client/objects.py @@ -14,6 +14,7 @@ class EmailAttachment: class EmailMessage: """Класс для представления электронного письма""" from_addr: str + from_email: str subj: str dt: datetime body: str diff --git a/src/mail_order_bot/main.py b/src/mail_order_bot/main.py index 007a005..51d3768 100644 --- a/src/mail_order_bot/main.py +++ b/src/mail_order_bot/main.py @@ -36,6 +36,7 @@ class MailOrderBot(ConfigManager): logger.info(email.body) logger.info(email.first_sender) logger.info('--------------------------------') + logger.critical("mail checked") logger = logging.getLogger() diff --git a/tests/email_client/test_email_client.py b/tests/email_client/test_email_client.py index e06f694..09e346c 100644 --- a/tests/email_client/test_email_client.py +++ b/tests/email_client/test_email_client.py @@ -4,6 +4,7 @@ from mail_order_bot.email_client import EmailClient if __name__ == "__main__": + print(__name__) # подгружаем переменные окружения load_dotenv() @@ -15,13 +16,13 @@ if __name__ == "__main__": imap_port=os.getenv('IMAP_PORT'), smtp_port=os.getenv('SMTP_PORT') ) - emails = email_client.get_emails(folder='spareparts', only_unseen=True, mark_as_read=True) + emails = email_client.get_emails(folder='spareparts', only_unseen=True, mark_as_read=False) for email in emails: print(email.subj) print(email.from_addr) + print(email.from_email) print(email.dt) - print(email.body) print(email.first_sender) print('--------------------------------')