Source code for assembl.models.generic

"""The basic Content and ContentSource classes.

.. inheritance-diagram:: ContentSource Content PostSource AnnotatorSource assembl.models.post.Post assembl.models.post.LocalPost assembl.models.post.SynthesisPost assembl.models.post.WidgetPost assembl.models.post.IdeaProposalPost assembl.models.post.ImportedPost assembl.models.mail.AbstractMailbox assembl.models.mail.IMAPMailbox assembl.models.mail.MailingList assembl.models.mail.AbstractFilesystemMailbox assembl.models.mail.MaildirMailbox assembl.models.mail.Email assembl.models.annotation.Webpage
    :parts: 1
"""
from future.utils import native_str_to_bytes, bytes_to_native_str
from builtins import hex
import logging
from abc import abstractmethod
import re
import uuid

from sqlalchemy import (
    Column,
    Integer,
    SmallInteger,
    Boolean,
    UnicodeText,
    String,
    Unicode,
    DateTime,
    ForeignKey,
)
from sqlalchemy.orm import relationship, backref, aliased
from ..lib import config
from sqla_rdfbridge.mapping import PatternIriClass

from ..lib.sqla import CrudOperation
from ..lib.model_watcher import get_model_watcher
from ..lib.utils import get_global_base_url
from . import Base, DiscussionBoundBase, OriginMixin
from .langstrings import (LangString, LangStringEntry)
from ..semantic.virtuoso_mapping import QuadMapPatternS
from ..auth import (
    CrudPermissions, P_ADD_POST, P_READ, P_ADMIN_DISC, P_EDIT_POST)
from ..semantic.namespaces import (
    SIOC, CATALYST, ASSEMBL, DCTERMS, QUADNAMES, FOAF)
from .discussion import Discussion
from ..lib.history_mixin import TombstonableOriginMixin
from ..lib.clean_input import sanitize_text, sanitize_html


log = logging.getLogger(__name__)


[docs]class ContentSource(DiscussionBoundBase, OriginMixin): """ A ContentSource is where any outside content comes from. . """ __tablename__ = "content_source" __external_typename = "Container" rdf_class = SIOC.Container id = Column(Integer, primary_key=True, info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)}) name = Column(UnicodeText, nullable=False) type = Column(String(60), nullable=False) discussion_id = Column(Integer, ForeignKey( 'discussion.id', ondelete='CASCADE', onupdate='CASCADE' ), nullable=False, index=True) connection_error = Column(SmallInteger) error_description = Column(String) error_backoff_until = Column(DateTime)
[docs] @classmethod def special_quad_patterns(cls, alias_maker, discussion_id): return [ QuadMapPatternS( Discussion.iri_class().apply(cls.discussion_id), CATALYST.uses_source, cls.iri_class().apply(cls.id), name=QUADNAMES.uses_source, conditions=(cls.discussion_id != None,)), ]
discussion = relationship( "Discussion", backref=backref( 'sources', order_by="ContentSource.creation_date", cascade="all, delete-orphan"), info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)}) __mapper_args__ = { 'polymorphic_identity': 'content_source', 'polymorphic_on': type, 'with_polymorphic': '*' } retypeable_as = ("IMAPMailbox", "MailingList", "AbstractMailbox", "AbstractFilesystemMailbox", "AnnotatorSource", "PostSource", "FeedPostSource", "LoomioPostSource", "FacebookGenericSource", "FacebookGroupSource", "FacebookPagePostsSource", "FacebookPageFeedSource", "FacebookSinglePostSource", "EdgeSenseDrupalSource") @abstractmethod def generate_message_id(self, source_post_id): # Generate a globally unique message_id for the post using # its source_post_id (locally unique within that source.) # In many cases, the source_post_id is already globally unique. return source_post_id _non_email_chars = re.compile(r'[^!#-\'\*\+\-\./-9=\?A-Z\^_`a-z\|\~]', re.U) @classmethod def flatten_source_post_id(cls, source_post_id, extra_length=0): # Ensure that a source_post_id can be used as part 1 of message_id sanitized = cls._non_email_chars.subn( lambda c: '_' + hex(ord(c.group()))[2:], source_post_id)[0] if len(sanitized) + extra_length > 64: # 64 is max according to RFC 5322 # cut it short and add a digest of original import hashlib import base64 d = hashlib.md5() d.update(native_str_to_bytes(source_post_id, 'utf-8')) d = bytes_to_native_str(base64.urlsafe_b64encode(d.digest())) sanitized = sanitized[ :max(0, 64-len(d)-extra_length-1)] if sanitized: sanitized += "_" + d else: sanitized = d return sanitized def import_content(self, only_new=True): from assembl.tasks.source_reader import wake wake(self.id, reimport=not only_new) def make_reader(self): raise NotImplementedError()
[docs] def get_discussion_id(self): return self.discussion_id or self.discussion.id
@property def connection_error_as_text(self): from ..tasks.source_reader import ReaderStatus return (ReaderStatus(self.connection_error).name if self.connection_error is not None else None)
[docs] @classmethod def get_discussion_conditions(cls, discussion_id, alias_maker=None): return (cls.discussion_id == discussion_id,)
# Cannot be readable to all, because subclasses contain passwords crud_permissions = CrudPermissions(P_ADMIN_DISC, P_ADMIN_DISC) def reset_errors(self): self.connection_error = None self.error_description = None self.error_backoff_until = None
[docs]class PostSource(ContentSource): """ A Discussion PostSource is where commentary that is handled in the form of internal posts comes from. A discussion source should have a method for importing all content, as well as only importing new content. Maybe the standard interface for this should be `source.import()`. """ __tablename__ = "post_source" rdf_class = ASSEMBL.PostSource id = Column(Integer, ForeignKey( 'content_source.id', ondelete='CASCADE', onupdate='CASCADE' ), primary_key=True) last_import = Column(DateTime) __mapper_args__ = { 'polymorphic_identity': 'post_source', }
[docs] def get_discussion_id(self): return self.discussion_id or self.discussion.id
def get_default_prepended_id(self): # Used for PostSource's whose incoming posts cannot guarantee # ImportedPost.source_post_id is unique; in which case, the Post.message_id # which is a globally unique value maintain uniqueness integrity # by calling this function # Must be implemented by subclasses that will not have unique # id's on their incoming posts return "" @property def number_of_imported_posts(self): from .post import ImportedPost return self.db.query(ImportedPost).filter_by( source_id=self.id, tombstone_date=None).count()
[docs] @classmethod def get_discussion_conditions(cls, discussion_id, alias_maker=None): return (cls.discussion_id == discussion_id,)
[docs] def send_post(self, post): """ Send a new post in the discussion to the source. """ log.warn( "Source %s did not implement PostSource::send_post()" % self.__class__.__name__)
[docs]class AnnotatorSource(ContentSource): """ A source of content coming from annotator """ __tablename__ = "annotator_source" id = Column(Integer, ForeignKey( 'content_source.id', ondelete='CASCADE', onupdate='CASCADE' ), primary_key=True) __mapper_args__ = { 'polymorphic_identity': 'annotator_source', } def generate_message_id(self, source_post_id): return source_post_id or (uuid.uuid1().hex + "_annotator@" + config.get('public_hostname')) def make_reader(self): # only push return None
[docs]class ContentSourceIDs(Base): """ A table that keeps track of the number of external identities that an internal post can be exported to. A stepping-stone to having Sinks """ __tablename__ = 'content_source_ids' id = Column(Integer, primary_key=True) source_id = Column( Integer, ForeignKey( 'content_source.id', onupdate='CASCADE', ondelete='CASCADE'), nullable=False, index=True) source = relationship('ContentSource', backref=backref( 'pushed_messages', cascade='all, delete-orphan')) post_id = Column( Integer, ForeignKey( 'content.id', onupdate='CASCADE', ondelete='CASCADE'), nullable=False, index=True) post = relationship('Content', backref=backref('post_sink_associations', cascade='all, delete-orphan')) message_id_in_source = Column(String(256), nullable=False, index=True)
[docs]class Content(TombstonableOriginMixin, DiscussionBoundBase): """ Content is a polymorphic class to describe what is imported from a Source. The body and subject properly belong to the Post but were moved here to optimize the most common case. """ __tablename__ = "content" __external_typename = "SPost" # __table_cls__ = TableWithTextIndex rdf_class = SIOC.Post id = Column(Integer, primary_key=True, info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)}) type = Column(String(60), nullable=False) discussion_id = Column(Integer, ForeignKey( 'discussion.id', ondelete='CASCADE', onupdate='CASCADE', ), nullable=False, index=True) discussion = relationship( "Discussion", backref=backref( 'posts', order_by="Content.creation_date", cascade="all, delete-orphan"), info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)} ) subject_id = Column(Integer, ForeignKey(LangString.id), index=True) body_id = Column(Integer, ForeignKey(LangString.id), index=True) subject = relationship( LangString, primaryjoin=subject_id == LangString.id, backref=backref("subject_of_post", lazy="dynamic"), single_parent=True, lazy="joined", cascade="all, delete-orphan") body = relationship( LangString, primaryjoin=body_id == LangString.id, backref=backref("body_of_post", lazy="dynamic"), single_parent=True, lazy="joined", cascade="all, delete-orphan") def __init__(self, *args, **kwargs): if (kwargs.get('subject', None) is None and kwargs.get('subject_id', None) is None): kwargs['subject'] = LangString.EMPTY() if (kwargs.get('body', None) is None and kwargs.get('body_id', None) is None): kwargs['body'] = LangString.EMPTY() super(Content, self).__init__(*args, **kwargs) @classmethod def subqueryload_options(cls): # Options for subquery loading. Use when there are many languages in the discussion. return ( LangString.subqueryload_option(cls.subject), LangString.subqueryload_option(cls.body)) @classmethod def joinedload_options(cls): # Options for joined loading. Use when there are few languages in the discussion. return ( LangString.joinedload_option(cls.subject), LangString.joinedload_option(cls.body))
[docs] @classmethod def best_locale_query(cls, locales): "BUGGY. Return a query that will load the post, best subject and best body for the given locales" # this failed in virtuoso, check now # Note that it fails with just body, and succeeds with subject. # Go figure. Fortunately not needed yet. subject_ls = aliased(LangString) body_ls = aliased(LangString) best_subject_sq = LangString.best_lang_old(locales) best_body_sq = LangString.best_lang_old(locales) return cls.default_db.query( cls, best_subject_sq, best_body_sq).join( subject_ls, cls.subject_id == subject_ls.id).join( best_subject_sq).join( body_ls, cls.body_id == body_ls.id).join(best_body_sq)
# old_subject = Column("subject", CoerceUnicode(), server_default="", # info={'rdf': QuadMapPatternS(None, DCTERMS.title)}) # TODO: check HTML or text? SIOC.content should be text. # Do not give it for now, privacy reasons # old_body = Column("body", UnicodeText, server_default="") # info={'rdf': QuadMapPatternS(None, SIOC.content)}) # TODO: Refactor hidden into PublicationStates.WIDGET_SCOPED hidden = Column(Boolean, server_default='0') __mapper_args__ = { 'polymorphic_identity': 'content', 'polymorphic_on': 'type', 'with_polymorphic': '*' }
[docs] def populate_from_context(self, context): if not(self.discussion or self.discussion_id): self.discussion = context.get_instance_of_class(Discussion) super(Content, self).populate_from_context(context)
def get_subject(self): return self.subject def get_body(self): return self.body def get_title(self): return self.subject def safe_set_body(self, body): if self.get_body_mime_type() == 'text/plain': for e in body['entries']: e['value'] = sanitize_text(e['value']) else: for e in body['entries']: e['value'] = sanitize_html(e['value']) def safe_set_subject(self, subject): for e in subject['entries']: if "<" in e['value']: e['value'] = sanitize_text(e['value']) def remove_translations(self): if self.subject: self.subject.remove_translations() self.body.remove_translations()
[docs] def get_body_mime_type(self): """ Return the format of the body, so the frontend will know how to display it. Currently, only: text/plain (Understood as preformatted text) text/html (Undestood as some subset of html) """ return "text/plain"
def get_body_as_html(self): mimetype = self.get_body_mime_type() body = self.body if not body: return None if mimetype == 'text/html': return body elif mimetype == "text/plain": ls = LangString() for e in body.entries: _ = LangStringEntry( value='<span style="white-space: pre-wrap">%s</div>' % ( e.value,), langstring=ls, locale=e.locale) return ls else: log.error("What is this mimetype?" + mimetype) return body def get_original_subject(self): return self.subject.first_original().value def get_original_body_as_html(self): mimetype = self.get_body_mime_type() body = self.body if not body: return None if mimetype == 'text/html': return body.first_original().value elif mimetype == "text/plain": return '<span style="white-space: pre-wrap">%s</div>' % ( body.first_original().value,) else: log.error("What is this mimetype?" + mimetype) return body def get_original_body_as_text(self): mimetype = self.get_body_mime_type() body = self.body if not body: return '' body = body.first_original().value or '' if mimetype == 'text/plain': return body elif mimetype == 'text/html': return sanitize_text(body) else: log.error("What is this mimetype?" + mimetype) return body def has_attachments(self): return self.attachments or False def get_attachments_as_html_list(self): img_style = "margin: 15px 0 15px 0; max-width: 500px; max-height: auto;" img_source = "<a href='%s' target='_blank' style='%s'><img src='%s'></img></a>" other_source = "<a href='%s' target='_blank'>%s</a>" attachments = self.attachments attachment_sorted = sorted(attachments, key=lambda a: a.document.type) output = [] for attachment in attachment_sorted: document = attachment.document mime_type = document.mime_type if mime_type and 'image' in mime_type: output.append(img_source % (document.external_url, img_style, document.external_url)) else: title = document.title or document.external_url output.append(other_source % (document.external_url, title)) return output def get_body_as_text(self): mimetype = self.get_body_mime_type() body = self.body if not body: return None if mimetype == 'text/plain': return body elif mimetype == 'text/html': ls = LangString() for e in body.entries: _ = LangStringEntry( value=sanitize_text(e.value), langstring=ls, locale=e.locale) return ls else: log.error("What is this mimetype?" + mimetype) return body def maybe_translate(self, pref_collection): from assembl.tasks.translate import ( translate_content, PrefCollectionTranslationTable) service = self.discussion.translation_service() if service.canTranslate is not None: translations = PrefCollectionTranslationTable( service, pref_collection) translate_content( self, translation_table=translations, service=service)
[docs] def send_to_changes(self, connection=None, operation=CrudOperation.UPDATE, discussion_id=None, view_def="changes"): """invoke the modelWatcher on creation""" super(Content, self).send_to_changes( connection, operation, discussion_id, view_def) watcher = get_model_watcher() if operation == CrudOperation.CREATE: watcher.processPostCreated(self.id)
[docs] def get_discussion_id(self): return self.discussion_id or self.discussion.id
@property def exported_to_sources(self): return [ContentSource.uri_generic(s.source_id) for s in self.post_sink_associations]
[docs] @classmethod def get_discussion_conditions(cls, discussion_id, alias_maker=None): return (cls.discussion_id == discussion_id,)
[docs] @classmethod def special_quad_patterns(cls, alias_maker, discussion_id): discussion_alias = alias_maker.get_reln_alias(cls.discussion) return [ QuadMapPatternS( None, FOAF.homepage, PatternIriClass( QUADNAMES.post_external_link_iri, # TODO: Use discussion.get_base_url. # This should be computed outside the DB. get_global_base_url() + '/%s/posts/local:SPost/%d', None, ('slug', Unicode, False), ('id', Integer, False)).apply( discussion_alias.slug, cls.id), name=QUADNAMES.post_external_link_map) ]
def language_priors(self, translation_service): discussion = self.discussion discussion_locales = discussion.discussion_locales return {translation_service.asKnownLocale(loc): 1 for loc in discussion_locales} def guess_languages(self): from .langstrings import LocaleLabel if self.discussion is None: self.discussion = Discussion.get(self.discussion_id) assert self.discussion ts = self.discussion.translation_service() priors = self.language_priors(ts) if self.body: body_original = self.body.first_original() ts.confirm_locale(body_original, priors) if self.subject: if self.body and body_original.locale_code not in \ LocaleLabel.SPECIAL_LOCALES: # boost the body's language priors = {k: v * 0.6 for (k, v) in priors.items()} priors[body_original.locale_code] = 1 subject_original = self.subject.first_original() ts.confirm_locale(subject_original, priors) def indirect_idea_content_links(self): return [] def widget_ideas(self): from .idea import Idea return [Idea.uri_generic(wil.idea_id) for wil in self.widget_idea_links] crud_permissions = CrudPermissions( P_ADD_POST, P_READ, P_EDIT_POST, P_ADMIN_DISC)
LangString.setup_ownership_load_event(Content, ['subject', 'body'])