# coding=UTF-8
""":py:class:`assembl.models.post.Post` that came as email, and utility code for handling email."""
from builtins import str
from builtins import object
import email
import mailbox
import re
import smtplib
import os
from html import escape as html_escape
from collections import defaultdict
from email.header import decode_header as decode_email_header, Header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.utils import parseaddr, mktime_tz, parsedate_tz
from email.message import Message
import logging
from html import escape
from future.utils import native_str, as_native_str, binary_type, PY2, bytes_to_native_str
from past.builtins import str as oldstr
import jwzthreading
from ..lib.clean_input import sanitize_html
from pyramid.threadlocal import get_current_registry
from datetime import datetime
# from imaplib2 import IMAP4_SSL, IMAP4
import transaction
from pyisemail import is_email
from sqlalchemy.orm import (deferred, undefer, joinedload_all)
from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound
from sqlalchemy import (
Column,
Integer,
ForeignKey,
String,
Binary,
UnicodeText,
Boolean,
)
from ..lib.sqla_types import (CoerceUnicode, EmailString)
from .langstrings import LangString
from .generic import PostSource
from .post import ImportedPost
from .auth import EmailAccount
from .attachment import File, PostAttachment, AttachmentPurpose
from ..tasks.imap import import_mails
from ..tasks.translate import translate_content
log = logging.getLogger(__name__)
[docs]class AbstractMailbox(PostSource):
"""
A Mailbox refers to any source of Email, and
whose messages should be imported and displayed as Posts.
It must not be instanciated directly
"""
__tablename__ = "mailbox"
id = Column(Integer, ForeignKey(
'post_source.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
folder = Column(UnicodeText, default=u"INBOX", nullable=False)
# The admin sender email is used for notifications, usually with the
# name of the original post sender.
admin_sender = Column(EmailString)
last_imported_email_uid = Column(UnicodeText)
subject_mangling_regex = Column(UnicodeText, nullable=True)
subject_mangling_replacement = Column(UnicodeText, nullable=True)
__compiled_subject_mangling_regex = None
def _compile_subject_mangling_regex(self):
if(self.subject_mangling_regex):
self.__compiled_subject_mangling_regex =\
re.compile(self.subject_mangling_regex)
else:
self.__compiled_subject_mangling_regex = None
__mapper_args__ = {
'polymorphic_identity': 'mailbox',
'with_polymorphic': '*'
}
def mangle_mail_subject(self, subject):
if self.__compiled_subject_mangling_regex is None:
self._compile_subject_mangling_regex()
if self.__compiled_subject_mangling_regex:
if self.subject_mangling_replacement:
repl = self.subject_mangling_replacement
else:
repl = ''
(retval, num) =\
self.__compiled_subject_mangling_regex.subn(repl, subject)
return retval
else:
return subject
@staticmethod
def clean_angle_brackets(message_id):
if message_id and message_id.startswith("<") and message_id.endswith(">"):
return message_id[1:-1]
return message_id
@staticmethod
def text_to_html(message_body):
return "<pre>%s</pre>" % escape(message_body)
[docs] @staticmethod
def strip_full_message_quoting_plaintext(message_body):
"""Assumes any encoding conversions have already been done
"""
#Most useful to develop this:
#http://www.motobit.com/util/quoted-printable-decoder.asp
debug = False;
#To be considered matching, each line must match successive lines, in order
quote_announcement_lines_regexes = {
'generic_original_message': {
'announceLinesRegexes': [re.compile("/-+\s*Original Message\s*-+/")],
'quotePrefixRegex': re.compile(r"^>\s|^>$")
},
'gmail_fr_circa_2012': {
'announceLinesRegexes': [re.compile(r"^Le .*, .*<.*@.*> a écrit :")],# 2012 Le 6 juin 2011 15:43, <nicolas.decordes@orange-ftgroup.com> a écrit :
'quotePrefixRegex': re.compile(r"^>\s|^>$")
},
'gmail_en_circa_2014': {
'announceLinesRegexes': [re.compile(r"^\d{4}-\d{2}-\d{2}.*<.*@.*>:")],# 2014-06-17 10:32 GMT-04:00 Benoit Grégoire <benoitg@coeus.ca>:
'quotePrefixRegex': re.compile(r"^>\s|^>$")
},
'outlook_fr_circa_2012': {
'announceLinesRegexes': [re.compile(r"^\d{4}-\d{2}-\d{2}.*<.*@.*>:")],# 2014-06-17 10:32 GMT-04:00 Benoit Grégoire <benoitg@coeus.ca>:
'quotePrefixRegex': re.compile(r"^>\s|^>$")
},
'outlook_fr_multiline_circa_2012': {
'announceLinesRegexes': [re.compile(r"^_+$"), #________________________________
re.compile(r"^\s*$"), #Only whitespace
re.compile(r"^De :.*$"),
re.compile(r"^Envoy.+ :.*$"),
re.compile(r"^À :.*$"),
re.compile(r"^Objet :.*$"),
],
'quotePrefixRegex': re.compile(r"^.*$")
},
'outlook_en_multiline_circa_2012': {
'announceLinesRegexes': [re.compile(r"^_+$"), #________________________________
re.compile(r"^\s*$"), #Only whitespace
re.compile(r"^From:.*$"),
re.compile(r"^Sent:.*$"),
re.compile(r"^To:.*$"),
re.compile(r"^Subject:.*$"),
],
'quotePrefixRegex': re.compile(r"^.*$")
},
}
def check_quote_announcement_lines_match(currentQuoteAnnounce, keysStillMatching, lineToMatch):
if len(keysStillMatching) == 0:
#Restart from scratch
keysStillMatching = list(quote_announcement_lines_regexes.keys())
nextIndexToMatch = len(currentQuoteAnnounce)
keys = list(keysStillMatching)
matchComplete = False
for key in keys:
if len(quote_announcement_lines_regexes[key]['announceLinesRegexes']) > nextIndexToMatch:
if quote_announcement_lines_regexes[key]['announceLinesRegexes'][nextIndexToMatch].match(lineToMatch):
if len(quote_announcement_lines_regexes[key]['announceLinesRegexes']) -1 == nextIndexToMatch:
matchComplete = key
else:
keysStillMatching.remove(key)
if len(keysStillMatching)>0:
currentQuoteAnnounce.append(lineToMatch)
return matchComplete, keysStillMatching
defaultQuotePrefixRegex=re.compile(r"^>\s|^>$")
quote_prefix_regex=defaultQuotePrefixRegex
whitespace_line_regex=re.compile(r"^\s*$")
retval = []
currentQuoteAnnounce = []
keysStillMatching = []
currentQuote = []
currentWhiteSpace = []
class LineState(object):
Normal="Normal"
PrefixedQuote='PrefixedQuote'
PotentialQuoteAnnounce='PotentialQuoteAnnounce'
QuoteAnnounceLastLine='QuoteAnnounceLastLine'
AllWhiteSpace='AllWhiteSpace'
line_state_before_transition = LineState.Normal
previous_line_state = LineState.Normal
line_state = LineState.Normal
for line in message_body.splitlines():
if line_state != previous_line_state:
line_state_before_transition = previous_line_state
previous_line_state = line_state
(matchComplete, keysStillMatching) = check_quote_announcement_lines_match(currentQuoteAnnounce, keysStillMatching, line)
if matchComplete:
line_state = LineState.QuoteAnnounceLastLine
quote_prefix_regex = quote_announcement_lines_regexes[keysStillMatching[0]]['quotePrefixRegex']
elif len(keysStillMatching) > 0:
line_state = LineState.PotentialQuoteAnnounce
elif quote_prefix_regex.match(line):
line_state = LineState.PrefixedQuote
elif whitespace_line_regex.match(line):
line_state = LineState.AllWhiteSpace
else:
line_state = LineState.Normal
if line_state == LineState.Normal:
if((previous_line_state != LineState.AllWhiteSpace) & len(currentWhiteSpace) > 0):
retval += currentWhiteSpace
currentWhiteSpace = []
if(len(currentQuote) > 0):
retval += currentQuoteAnnounce
retval += currentQuote
currentQuote = []
currentQuoteAnnounce = []
if(previous_line_state == LineState.AllWhiteSpace):
retval += currentWhiteSpace
currentWhiteSpace = []
retval.append(line)
elif line_state == LineState.PrefixedQuote:
currentQuote.append(line)
elif line_state == LineState.QuoteAnnounceLastLine:
currentQuoteAnnounce = []
elif line_state == LineState.AllWhiteSpace:
currentWhiteSpace.append(line)
log.debug("%-30s %s" % (line_state, line))
#if line_state == LineState.PrefixedQuote | (line_state == LineState.AllWhiteSpace & line_state_before_transition == LineState.PrefixedQuote)
#We just let trailing quotes and whitespace die...
return '\n'.join(retval)
[docs] @staticmethod
def strip_full_message_quoting_html(message_body):
"""Assumes any encoding conversions have already been done
"""
#Most useful to develop this:
#http://www.motobit.com/util/quoted-printable-decoder.asp
#http://www.freeformatter.com/html-formatter.html
#http://www.freeformatter.com/xpath-tester.html#ad-output
debug = True;
from lxml import html, etree
doc = None
try:
doc = html.fromstring(message_body)
except etree.ParserError: # If the parsed HTML document is empty, we get a "ParserError: Document is empty" exception. So the stripped message we return is an empty string (if we keep the exception it blocks the SourceReader)
return ""
#Strip GMail quotes
matches = doc.find_class('gmail_quote')
if len(matches) > 0:
if not matches[0].text or "---------- Forwarded message ----------" not in matches[0].text:
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Strip modern Apple Mail quotes
find = etree.XPath(r"//child::blockquote[contains(@type,'cite')]/preceding-sibling::br[contains(@class,'Apple-interchange-newline')]/parent::node()/parent::node()")
matches = find(doc)
#log.debug(len(matches))
#for index,match in enumerate(matches):
# log.debug("Match: %d: %s " % (index, html.tostring(match, encoding="unicode")))
if len(matches) == 1:
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Strip old AppleMail quotes (french)
regexpNS = "http://exslt.org/regular-expressions"
##Trying to match: Le 6 juin 2011 à 11:02, Jean-Michel Cornu a écrit :
find = etree.XPath(r"//child::div[re:test(text(), '^.*Le .*\d{4} .*:\d{2}, .* a .*crit :.*$', 'i')]/following-sibling::br[contains(@class,'Apple-interchange-newline')]/parent::node()",
namespaces={'re': regexpNS})
matches = find(doc)
if len(matches) == 1:
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Strip Outlook quotes (when outlook gives usable structure)
find = etree.XPath(r"//body/child::blockquote/child::div[contains(@class,'OutlookMessageHeader')]/parent::node()")
matches = find(doc)
if len(matches) == 1:
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Strip Outlook quotes (when outlook gives NO usable structure)
successiveStringsToMatch = [
'|'.join(['^From:.*$','^De :.*$']),
'|'.join(['^Sent:.*$','^Envoy.+ :.*$']),
'|'.join(['^To:.*$','^.+:.*$']), #Trying to match À, but unicode is really problematic in lxml regex
'|'.join(['^Subject:.*$','^Objet :.*$']),
]
regexpNS = "http://exslt.org/regular-expressions"
successiveStringsToMatchRegex = []
for singleHeaderLanguageRegex in successiveStringsToMatch:
successiveStringsToMatchRegex.append(r"descendant::*[re:test(text(), '"+singleHeaderLanguageRegex+"')]")
regex = " and ".join(successiveStringsToMatchRegex)
find = etree.XPath(r"//descendant::div["+regex+"]",
namespaces={'re':regexpNS})
matches = find(doc)
if len(matches) == 1:
findQuoteBody = etree.XPath(r"//descendant::div["+regex+"]/following-sibling::*",
namespaces={'re':regexpNS})
quoteBodyElements = findQuoteBody(doc)
for quoteElement in quoteBodyElements:
#This moves the text to the tail of matches[0]
quoteElement.drop_tree()
matches[0].tail = None
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Strip Thunderbird quotes
mainXpathFragment = "//child::blockquote[contains(@type,'cite') and boolean(@cite)]"
find = etree.XPath(mainXpathFragment+"/self::blockquote")
matches = find(doc)
if len(matches) == 1:
matchQuoteAnnounce = doc.xpath(mainXpathFragment+"/preceding-sibling::*")
if len(matchQuoteAnnounce) > 0:
matchQuoteAnnounce[-1].tail = None
matches[0].drop_tree()
return html.tostring(doc, encoding="unicode")
#Nothing was stripped...
return html.tostring(doc, encoding="unicode")
[docs] def parse_email(self, message_string, existing_email=None):
""" Creates or replace a email from a string """
if isinstance(message_string, binary_type):
message_bytes = message_string
message_string = message_bytes.decode('utf-8')
else:
message_bytes = message_string.encode('utf-8')
parsed_email = email.message_from_string(
bytes_to_native_str(message_bytes))
body = None
error_description = None
default_charset = parsed_email.get_charset() or 'ISO-8859-1'
def extract_text(part):
""" Returns HTML or Text parts of a message"""
mimetype = part.get_content_type()
if part.is_multipart():
if mimetype == "multipart/alternative":
text_part = None
for subpart in part.get_payload():
(subpart_c, subtype) = extract_text(subpart)
if subpart_c is None:
continue
elif subtype == "text/html":
return (subpart_c, subtype)
elif subtype == "text/plain":
text_part = subpart_c
else:
log.debug("cannot treat alternative %s", subtype)
if text_part:
return (text_part, "text/plain")
return (None, None)
else:
parts = []
parts_type = None
for subpart in part.get_payload():
(subpart_c, subtype) = extract_text(subpart)
if not subpart_c:
continue
elif subtype == 'text/html':
if parts_type == 'text/plain':
parts = [AbstractMailbox.text_to_html(p)
for p in parts]
parts_type = 'text/html'
parts.append(subpart_c)
elif subtype == 'text/plain':
if parts_type == 'text/html':
subpart_c = AbstractMailbox.text_to_html(subpart_c)
else:
parts_type = 'text/plain'
parts.append(subpart_c)
elif not subpart.is_attachment():
log.debug("cannot treat text subpart %s", subtype)
if not parts:
return (None, None)
if len(parts) == 1:
return (parts[0], parts_type)
if parts_type == "text/html":
return ("\n".join([
"<div>%s</div>" % p for p in parts]), parts_type)
if parts_type == "text/plain":
return ("\n".join(parts), parts_type)
elif part.get_content_disposition():
# TODO: Inline attachments
return (None, None)
elif mimetype in ("text/html", "text/plain"):
charset = part.get_content_charset(default_charset)
decoded_part = part.get_payload(decode=True)
decoded_part = decoded_part.decode(charset, 'replace')
if mimetype == "text/html":
decoded_part = sanitize_html(
AbstractMailbox.strip_full_message_quoting_html(
decoded_part))
else:
decoded_part = AbstractMailbox.strip_full_message_quoting_plaintext(
decoded_part)
return (decoded_part, mimetype)
else:
log.debug("cannot treat part %s", mimetype)
return (None, None)
(body, mimeType) = extract_text(parsed_email)
def email_header_to_unicode(header_string, join_crlf=True):
text = u''.join(
[
txt.decode(enc) if enc else txt.decode('iso-8859-1') if isinstance(txt, bytes) else txt
for (txt, enc) in decode_email_header(header_string)
]
)
if join_crlf:
text = u''.join(text.split(u'\r\n'))
return text
new_message_id = parsed_email.get('Message-ID', None)
if new_message_id:
new_message_id = self.clean_angle_brackets(
email_header_to_unicode(new_message_id))
else:
error_description = "Unable to parse the Message-ID for message string: \n%s" % message_string
return (None, None, error_description)
assert new_message_id
new_in_reply_to = parsed_email.get('In-Reply-To', None)
if new_in_reply_to:
new_in_reply_to = self.clean_angle_brackets(
email_header_to_unicode(new_in_reply_to))
sender_name, sender_email = parseaddr(parsed_email.get('From'))
sender_name = email_header_to_unicode(sender_name)
if sender_name:
sender = "%s <%s>" % (sender_name, sender_email)
else:
sender = sender_email
sender_email_account = EmailAccount.get_or_make_profile(self.db, sender_email, sender_name)
creation_date = datetime.utcfromtimestamp(
mktime_tz(parsedate_tz(parsed_email['Date'])))
subject = email_header_to_unicode(parsed_email['Subject'], False)
recipients = email_header_to_unicode(parsed_email['To'])
body = body.strip()
# Try/except for a normal situation is an anti-pattern,
# but sqlalchemy doesn't have a function that returns
# 0, 1 result or an exception
try:
email_object = self.db.query(Email).filter(
Email.source_post_id == new_message_id,
Email.discussion_id == self.discussion_id,
Email.source == self
).one()
if existing_email and existing_email != email_object:
raise ValueError("The existing object isn't the same as the one found by message id")
email_object.recipients = recipients
email_object.sender = sender
email_object.creation_date = creation_date
email_object.source_post_id = new_message_id
email_object.in_reply_to = new_in_reply_to
email_object.body_mime_type = mimeType
email_object.imported_blob = message_bytes
# TODO MAP: Make this nilpotent.
email_object.subject = LangString.create(subject)
email_object.body = LangString.create(body)
except NoResultFound:
email_object = Email(
discussion=self.discussion,
source=self,
recipients=recipients,
sender=sender,
subject=LangString.create(subject),
creation_date=creation_date,
source_post_id=new_message_id,
in_reply_to=new_in_reply_to,
body=LangString.create(body),
body_mime_type = mimeType,
imported_blob=message_bytes
)
except MultipleResultsFound:
""" TO find duplicates (this should no longer happen, but in case it ever does...
SELECT * FROM post WHERE id in (SELECT MAX(post.id) as max_post_id FROM imported_post JOIN post ON (post.id=imported_post.id) GROUP BY message_id, source_id HAVING COUNT(post.id)>1)
To kill them:
USE assembl;
UPDATE post p
SET parent_id = (
SELECT new_post_parent.id AS new_post_parent_id
FROM post AS post_to_correct
JOIN post AS bad_post_parent ON (post_to_correct.parent_id = bad_post_parent.id)
JOIN post AS new_post_parent ON (new_post_parent.message_id = bad_post_parent.message_id AND new_post_parent.id <> bad_post_parent.id)
WHERE post_to_correct.parent_id IN (
SELECT MAX(post.id) as max_post_id
FROM imported_post
JOIN post ON (post.id=imported_post.id)
GROUP BY message_id, source_id
HAVING COUNT(post.id)>1
)
AND p.id = post_to_correct.id
)
USE assembl;
DELETE
FROM post WHERE post.id IN (SELECT MAX(post.id) as max_post_id FROM imported_post JOIN post ON (post.id=imported_post.id) GROUP BY message_id, source_id HAVING COUNT(post.id)>1)
"""
raise MultipleResultsFound("ID %s has duplicates in source %d" % (
new_message_id, self.id))
email_object.creator = sender_email_account.profile
# email_object = self.db.merge(email_object)
if not email_object.attachments:
attachment_parts = [p for p in parsed_email.walk()
if p.get_content_disposition()]
for (num, part) in enumerate(attachment_parts):
title = part.get_filename("file %d" % num)
doc = File(
discussion=self.discussion,
mime_type=part.get_content_type(),
title=title)
payload = part.get_payload(decode=True)
if part.get_content_type() == "message/rfc822":
payload = part.as_bytes()
doc.add_raw_data(payload)
attachment = PostAttachment(
discussion=self.discussion,
document=doc,
post=email_object,
# the following should reflect whether part.get_content_disposition()
# is inline or attachment
attachmentPurpose='EMBED_ATTACHMENT',
creator=email_object.creator,
title=title)
self.db.add(attachment)
email_object.guess_languages()
return (email_object, parsed_email, error_description)
[docs] @staticmethod
def guess_encoding(blob):
"""Blobs should be ascii, but sometimes are multiply-encoded
utf-8, probably a bug of the underlying library.
Temporary patch until it is fixed."""
if not isinstance(blob, native_str):
try:
# shortcut that will work in 99% of cases
return blob.decode('ascii')
except UnicodeDecodeError:
blob = blob.decode('iso-8859-1')
while True:
try:
blob2 = blob.encode('iso-8859-1').decode('utf-8')
if blob == blob2:
return blob
blob = blob2
except (UnicodeDecodeError, UnicodeEncodeError):
return blob
"""
emails have to be a complete set
"""
@staticmethod
def thread_mails(emails):
#log.debug('Threading...')
emails_for_threading = []
for mail in emails:
blob = AbstractMailbox.guess_encoding(mail.imported_blob)
email_for_threading = jwzthreading.Message(email.message_from_string(blob))
#Store our emailsubject, jwzthreading does not decode subject itself
email_for_threading.subject = mail.subject.first_original().value
#Store our email object pointer instead of the raw message text
email_for_threading.message = mail
emails_for_threading.append(email_for_threading)
threaded_emails = jwzthreading.thread(emails_for_threading)
# Output
for container in threaded_emails:
jwzthreading.print_container(container, 0, True)
def update_threading(threaded_emails, debug=False):
log.debug("\n\nEntering update_threading() for %ld mails:" % len(threaded_emails))
for container in threaded_emails:
message = container['message']
# if debug:
#jwzthreading.print_container(container)
message_string = "%s %s %d " % (
message.subject, message.message_id,
message.message.id) if message else "null "
log.debug("Processing: %s container: %s parent: %s children :%s" % (
message_string, container, container.parent, container.children))
if(message):
current_parent = message.message.parent
if(current_parent):
db_parent_message_id = current_parent.message_id
else:
db_parent_message_id = None
if container.parent:
parent_message = container.parent['message']
if parent_message:
#jwzthreading strips the <>, re-add them
algorithm_parent_message_id = u"<" + parent_message.message_id + u">"
else:
log.warn("Parent was a dummy container, we may need "
"to handle this case better, as we just "
"potentially lost sibling relationships")
algorithm_parent_message_id = None
else:
algorithm_parent_message_id = None
log.debug("Current parent from database: " + repr(db_parent_message_id))
log.debug("Current parent from algorithm: " + repr(algorithm_parent_message_id))
log.debug("References: " + repr(message.references))
if algorithm_parent_message_id != db_parent_message_id:
if current_parent == None or isinstance(current_parent, Email):
log.debug("UPDATING PARENT for :" + repr(message.message.message_id))
new_parent = parent_message.message if algorithm_parent_message_id else None
log.debug(repr(new_parent))
message.message.set_parent(new_parent)
else:
log.debug("Skipped reparenting: the current parent "
"isn't an email, the threading algorithm only "
"considers mails")
update_threading(container.children, debug=debug)
else:
log.debug("Current message ID: None, was a dummy container")
update_threading(container.children, debug=debug)
update_threading(threaded_emails, debug=False)
[docs] def reprocess_content(self):
""" Allows re-parsing all content as if it were imported for the first time
but without re-hitting the source, or changing the object ids.
Call when a code change would change the representation in the database
"""
session = self.db
emails = session.query(Email.id).filter(
Email.source_id == self.id)
for email_id in emails:
with transaction.manager:
email_ = Email.get(email_id).options(
joinedload_all(Email.parent), undefer(Email.imported_blob))
blob = AbstractMailbox.guess_encoding(email.imported_blob)
(email_object, dummy, error) = self.parse_email(blob, email)
with transaction.manager:
self.thread_mails(emails)
def import_content(self, only_new=True):
from assembl.lib.config import get_config
from pyramid.settings import asbool
assert self.id
config = get_config()
if asbool(config.get('use_source_reader_for_mail', False)):
super(AbstractMailbox, self).import_content(only_new)
else:
import_mails.delay(self.id, only_new)
_address_match_re = re.compile(
r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}'
)
[docs] def most_common_recipient_address(self):
"""
Find the most common recipient address of the contents of this emaila
address. This address can, in most use-cases can be considered the
mailing list address.
"""
recipients = self.db.query(
Email.recipients,
).filter(
Email.source_id == self.id,
)
addresses = defaultdict(int)
for (recipients, ) in recipients:
for address in self._address_match_re.findall(recipients):
addresses[address] += 1
if addresses:
addresses = list(addresses.items())
addresses.sort(key=lambda address_count: address_count[1])
return addresses[-1][0]
[docs] def send_post(self, post):
#TODO benoitg
log.warn("TODO: Mail::send_post(): Actually queue message")
#make sure you have a request and use the pyramid mailer
[docs] def message_ok_to_import(self, message_string):
"""Check if message should be imported at all (not a bounce, vacation,
etc.)
The reference is La référence est http://tools.ietf.org/html/rfc3834
"""
#TODO: This is a double-parse, refactor parse_message so we can reuse it
if isinstance(message_string, binary_type):
message_string = message_string.decode('utf-8')
parsed_email = email.message_from_string(message_string)
if parsed_email.get('Return-Path', None) == '<>':
#TODO: Check if a report-type=delivery-status; is present,
# and process the bounce
return False
if parsed_email.get('Precedence', None) == 'bulk':
# Possibly a mailing list message: Allow for mailing lists only
return isinstance(self, MailingList)
if parsed_email.get('Precedence', None) == 'list':
# A mailing list message: Allow for mailing lists only
return isinstance(self, MailingList)
if parsed_email.get('Auto-Submitted', None) == 'auto-generated':
return False
return True
def generate_message_id(self, source_post_id):
if source_post_id.startswith('<') and source_post_id.endswith('>'):
source_post_id = source_post_id[1:-1]
# Use even invalid ids if they come from mail.
return source_post_id
[docs]class IMAPMailbox(AbstractMailbox):
"""
A IMAPMailbox refers to an Email inbox that can be accessed with IMAP.
"""
__tablename__ = "source_imapmailbox"
id = Column(Integer, ForeignKey(
'mailbox.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
host = Column(String(1024), nullable=False)
port = Column(Integer, nullable=False)
username = Column(UnicodeText, nullable=False)
#Note: If using STARTTLS, this should be set to false
use_ssl = Column(Boolean, default=True)
password = Column(UnicodeText, nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'source_imapmailbox',
'with_polymorphic': '*'
}
@staticmethod
def do_import_content(mbox, only_new=True):
mbox = mbox.db.merge(mbox)
session = mbox.db
session.add(mbox)
if mbox.use_ssl:
mailbox = IMAP4_SSL(host=mbox.host.encode('utf-8'), port=mbox.port)
else:
mailbox = IMAP4(host=mbox.host.encode('utf-8'), port=mbox.port)
if 'STARTTLS' in mailbox.capabilities:
#Always use starttls if server supports it
mailbox.starttls()
mailbox.login(mbox.username, mbox.password)
mailbox.select(mbox.folder)
command = "ALL"
search_status = None
email_ids = None
if only_new and mbox.last_imported_email_uid:
command = "(UID %s:*)" % mbox.last_imported_email_uid
search_status, search_result = mailbox.uid('search', None, command)
#log.debug("UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result))
email_ids = search_result[0].split()
#log.debug(email_ids)
if (only_new and search_status == 'OK' and email_ids
and email_ids[0] == mbox.last_imported_email_uid):
# Note: the email_ids[0]==mbox.last_imported_email_uid test is
# necessary beacuse according to https://tools.ietf.org/html/rfc3501
# seq-range like "3291:* includes the UID of the last message in
# the mailbox, even if that value is less than 3291."
# discard the first message, it should be the last imported email.
del email_ids[0]
else:
# Either:
# a) we don't import only new messages or
# b) the message with mbox.last_imported_email_uid hasn't been found
# (may have been deleted)
# In this case we request all messages and rely on duplicate
# detection
command = "ALL"
search_status, search_result = mailbox.uid('search', None, command)
# log.debug("UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result))
assert search_status == 'OK'
email_ids = search_result[0].split()
def import_email(mailbox_obj, email_id):
session = mailbox_obj.db
#log.debug("running fetch for message: "+email_id)
status, message_data = mailbox.uid('fetch', email_id, "(RFC822)")
assert status == 'OK'
#log.debug(repr(message_data))
for response_part in message_data:
if isinstance(response_part, tuple):
message_string = response_part[1]
assert message_string
if mailbox_obj.message_ok_to_import(message_string):
(email_object, dummy, error) = mailbox_obj.parse_email(message_string)
if error:
raise Exception(error)
session.add(email_object)
translate_content(email_object) # should delay
else:
log.info("Skipped message with imap id %s (bounce or vacation message)"% (email_id))
#log.debug("Setting mailbox_obj.last_imported_email_uid to "+email_id)
mailbox_obj.last_imported_email_uid = email_id
if len(email_ids):
log.info("Processing messages from IMAP: %d "% (len(email_ids)))
for email_id in email_ids:
with transaction.manager:
import_email(mbox, email_id)
else:
log.info("No IMAP messages to process")
discussion_id = mbox.discussion_id
mailbox.close()
mailbox.logout()
with transaction.manager:
if len(email_ids):
#We imported mails, we need to re-thread
emails = session.query(Email).filter(
Email.discussion_id == discussion_id,
).options(joinedload_all(Email.parent))
AbstractMailbox.thread_mails(emails)
def make_reader(self):
from assembl.tasks.imapclient_source_reader import IMAPReader
return IMAPReader(self.id)
[docs] def get_send_address(self):
"""
Get the email address to send a message to the discussion
"""
return self.most_common_recipient_address()
[docs]class MailingList(IMAPMailbox):
"""
A mailbox with mailing list semantics
(single post address, subjetc mangling, etc.)
"""
__tablename__ = "source_mailinglist"
id = Column(Integer, ForeignKey(
'source_imapmailbox.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
# The address through which messages are sent to the list
post_email_address = Column(UnicodeText, nullable=True)
__mapper_args__ = {
'polymorphic_identity': 'source_mailinglist',
'with_polymorphic': '*'
}
[docs] def get_send_address(self):
"""
Get the email address to send a message to the discussion
"""
return self.post_email()
[docs]class AbstractFilesystemMailbox(AbstractMailbox):
"""
A Mailbox refers to an Email inbox that is stored the server's filesystem.
"""
__tablename__ = "source_filesystemmailbox"
id = Column(Integer, ForeignKey(
'mailbox.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
filesystem_path = Column(CoerceUnicode(), nullable=False)
__mapper_args__ = {
'polymorphic_identity': 'source_filesystemmailbox',
}
[docs]class MaildirMailbox(AbstractFilesystemMailbox):
"""
A Mailbox refers to an Email inbox that is stored as maildir on the server.
"""
__tablename__ = "source_maildirmailbox"
id = Column(Integer, ForeignKey(
'source_filesystemmailbox.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
__mapper_args__ = {
'polymorphic_identity': 'source_maildirmailbox',
}
@staticmethod
def do_import_content(abstract_mbox, only_new=True):
abstract_mbox = abstract_mbox.db.merge(abstract_mbox)
session = abstract_mbox.db
session.add(abstract_mbox)
discussion_id = abstract_mbox.discussion_id
if not os.path.isdir(abstract_mbox.filesystem_path):
raise "There is no directory at %s" % abstract_mbox.filesystem_path
else:
cur_folder_path = os.path.join(abstract_mbox.filesystem_path, 'cur')
cur_folder_present = os.path.isdir(cur_folder_path)
new_folder_path = os.path.join(abstract_mbox.filesystem_path, 'new')
new_folder_present = os.path.isdir(new_folder_path)
tmp_folder_path = os.path.join(abstract_mbox.filesystem_path, 'tmp')
tmp_folder_present = os.path.isdir(tmp_folder_path)
if not (cur_folder_present | new_folder_present | tmp_folder_present):
raise "Directory at %s is NOT a maildir" % abstract_mbox.filesystem_path
else:
#Fix the maildir in case some folders are missing
#For instance, git cannot store empty folder
if not cur_folder_present:
os.mkdir(cur_folder_path)
if not new_folder_present:
os.mkdir(new_folder_path)
if not tmp_folder_present:
os.mkdir(tmp_folder_path)
mbox = mailbox.Maildir(abstract_mbox.filesystem_path, factory=None, create=False)
mails = list(mbox.values())
#import pdb; pdb.set_trace()
def import_email(abstract_mbox, message_data):
session = abstract_mbox.db
message_string = message_data.as_string()
(email_object, dummy, error) = abstract_mbox.parse_email(message_string)
if error:
raise Exception(error)
with transaction.manager:
session.add(email_object)
abstract_mbox = AbstractMailbox.get(abstract_mbox.id)
if len(mails):
[import_email(abstract_mbox, message_data) for message_data in mails]
#We imported mails, we need to re-thread
with transaction.manager:
emails = session.query(Email).filter(
Email.discussion_id == discussion_id,
).options(joinedload_all(Email.parent))
AbstractMailbox.thread_mails(emails)
[docs]class Email(ImportedPost):
"""
An Email refers to an email message that was imported from an AbstractMailbox.
"""
__tablename__ = "email"
id = Column(Integer, ForeignKey(
'imported_post.id',
ondelete='CASCADE',
onupdate='CASCADE'
), primary_key=True)
recipients = Column(UnicodeText, nullable=False)
sender = Column(CoerceUnicode(), nullable=False)
in_reply_to = Column(CoerceUnicode())
__mapper_args__ = {
'polymorphic_identity': 'email',
}
[docs] def REWRITEMEreply(self, sender, response_body):
"""
Send a response to this email.
`sender` is a user instance.
`response` is a string.
"""
sent_from = ' '.join([
"%(sender_name)s on IdeaLoom" % {
"sender_name": sender.display_name()
},
"<%(sender_email)s>" % {
"sender_email": sender.get_preferred_email(),
}
])
if type(response_body) == 'str':
response_body = response_body.decode('utf-8')
recipients = self.recipients
message = MIMEMultipart('alternative')
message['Subject'] = Header(self.subject, 'utf-8')
message['From'] = sent_from
message['To'] = self.recipients
message.add_header('In-Reply-To', self.message_id)
plain_text_body = response_body
html_body = response_body
# TODO: The plain text and html parts of the email should be different,
# but we'll see what we can get from the front-end.
plain_text_part = MIMEText(
plain_text_body.encode('utf-8'),
'plain',
'utf-8'
)
html_part = MIMEText(
html_body.encode('utf-8'),
'html',
'utf-8'
)
message.attach(plain_text_part)
message.attach(html_part)
smtp_connection = smtplib.SMTP(
get_current_registry().settings['mail.host']
)
smtp_connection.sendmail(
sent_from,
recipients,
message.as_string()
)
smtp_connection.quit()
def language_priors(self, translation_service):
priors = super(Email, self).language_priors(translation_service)
email_obj = email.message_from_string(
bytes_to_native_str(self.imported_blob))
locales = {part.get('Content-Language') for part in email_obj.walk()
if part.get_content_type() in (
'text/plain', 'text/html', 'multipart/alternative')}
locales.discard(None)
if locales:
locales = {translation_service.asKnownLocale(loc)
for loc in locales}
priors = {k: v * (1 if k in locales else 0.8)
for (k, v) in priors.items()}
for lang in locales:
if lang not in priors:
priors[lang] = 1
return priors
@as_native_str()
def __repr__(self):
return "%s from %s to %s>" % (
super(Email, self).__repr__(),
self.sender.encode('iso-8859-1', 'ignore'),
self.recipients.encode('iso-8859-1', 'ignore'))
def get_title(self):
return self.source.mangle_mail_subject(self.subject)