Source code for assembl.models.path_utils

# -*- coding: utf-8 -*-
"""Utilities for traversing the set of content related to an idea and vice-versa."""

from builtins import range
from builtins import object
from functools import total_ordering
from collections import defaultdict
from bisect import bisect_right

from future.utils import as_native_str
from sqlalchemy import String
from sqlalchemy.orm import (with_polymorphic, aliased)
from sqlalchemy.sql.expression import or_, union, except_
from sqlalchemy.sql.functions import count

from .idea_content_link import (
    IdeaContentLink, IdeaContentPositiveLink, IdeaContentNegativeLink)
from .post import (
    Post, Content, SynthesisPost,
    countable_publication_states, deleted_publication_states)
from .annotation import Webpage
from .idea import IdeaVisitor, Idea, IdeaLink, RootIdea
from .discussion import Discussion
from .action import ViewPost

# TODO: Write a discussion structure cache manager.
# This will have caches of parent, children, counts, etc. at need
# and understand the invalidation relationships
# Tie it to request? use request.set_property?


# Cas à surveiller:
# I1 > I2 + P1 > P2
# I1 > I2 - P2
# =>
# I1, I2 < P1 / P2

# I1 > I2 + P1 > P2 > P3
# I1 > I2 - P2
# I1 > I2 > I3 + P3
# =>
# I1, I2 < P1, P3 / P2
# I3 < P3

# I1 + P1 > P2 > P3 > P4 > P5
# I1 - P2
# I1 + P3
# I1 - P5
# => I1, I2 < P1, P3, P4 / P5

# I1 > I2 + P1 > P2
# I1 > I2 - P2
# I1 > I3 + P1 > P3
# I1 > I3 - P1 > P3
# =>
# I2 < P1, P3 / P2
# I3 < P1, P2 / P3
# I1 < P1, P2, P3


[docs]@total_ordering
class PostPathData(object):
    "Data about a single post_path."
    __slots__=("positive", "post_path")

    def __init__(self, post_path, positive):
        self.post_path = post_path
        self.positive = positive

    def combine(self, other):
        if self.positive != other.positive:
            # if different ideas and neg subpath of pos, use pos.
            # if same idea, as below
            # Mais à faire globalement plus bas, pcq on perd des "mêmes idées" en combinant
            if self.post_path == other.post_path:
                return other if self.positive else self
            return None
        if self.post_path.startswith(other.post_path):
            return other
        elif other.post_path.startswith(self.post_path):
            return self
        return None

    @property
    def last_id(self):
        return int(self.post_path.strip(',').split(",")[-1])

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            return False
        return ((self.positive == other.positive)
            and (self.post_path == other.post_path))

    def __lt__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented
        if self.post_path != other.post_path:
            # lexicographic sort is good enough
            return self.post_path < other.post_path
        # We want positives to come first
        return self.positive > other.positive

    def __hash__(self):
        return hash(self.post_path) + int(self.positive)

    @as_native_str()
    def __repr__(self):
        return "<%s%s>" % (
            self.post_path, "+" if self.positive else "-")


[docs]class PostPathLocalCollection(object):
    "Data about all PostPaths local to an Idea."
    def __init__(self):
        self.paths = []
        self.reduced = True

    def add_path(self, path):
        self.paths.append(path)
        self.reduced = False

[docs]    def combine(self, other):
        "Combine the paths from two collections (different ideas)."
        if self and other:
            # First, filter out negative paths
            # covered by positive paths in other collection
            my_paths = [p for p in other.paths if not self.is_cancelled(p)]
            other_paths = [p for p in self.paths if not other.is_cancelled(p)]
            # Then combine and reduce.
            my_paths.extend(other_paths)
            self.paths = my_paths
            self.reduce()
        else:
            # if either is empty, simple case.
            self.paths.extend(other.paths)

[docs]    def reduce(self):
        """Reduce overlapping paths.
        For example, <1,+>,<1,2,+> becomes <1,+>
        But <1,+>,<1,2,->,<1,2,3,+> remains as-is
        And <1,+>,<1,2,->,<1,3,4,+> becomes <1,+>,<1,2,->"""
        if not len(self.paths):
            return
        self.paths.sort()
        paths = []
        ancestors_by_polarity = {True: [], False: [], None: []}
        for path in self.paths:
            for ancestors in ancestors_by_polarity.values():
                while ancestors:
                    if not path.post_path.startswith(ancestors[-1].post_path):
                        ancestors.pop()
                    else:
                        break
            ancestors = ancestors_by_polarity[None]
            pol_ancestors = ancestors_by_polarity[path.positive]
            # Special case: Combine in place
            if len(ancestors) and ancestors[-1].combine(path) is path:
                paths[paths.index(ancestors[-1])] = path
                ancestors[-1] = path
                pol_ancestors[-1] = path
                continue
            # Forget if it combines with a previous path of same polarity
            # BUT do not combine with distant previous path of same polarity
            # if closest previous path is super-path.
            last_path_pol = pol_ancestors[-1] if len(pol_ancestors) else None
            last_path = ancestors[-1] if len(ancestors) else None
            if last_path_pol is not None and not (
                    last_path is not last_path_pol and
                    path.post_path.startswith(last_path.post_path)):
                combined = last_path_pol.combine(path)
                assert combined is not path, "We should not combine forward"
                if combined is not None:
                    continue
            paths.append(path)
            ancestors.append(path)
            pol_ancestors.append(path)
        self.paths = paths
        self.reduced = True

    def find_insertion(self, data):
        assert self.reduced
        point = bisect_right(self.paths, data)
        is_below = False
        if point:
            previous = self.paths[point-1]
            is_below = data.post_path.startswith(previous.post_path)
        return point, is_below

    def is_cancelled(self, subpath):
        # a negative path from another idea is cancelled if covered
        # by a positive path.
        if subpath.positive:
            return False
        point, is_below = self.find_insertion(subpath)
        if not is_below:
            return False
        return self.paths[point-1].positive

[docs]    def includes_post(self, post_path):
        "Is this post (given as path) included in this collection?"
        # Weirdly, same logic as path cancellation.
        return self.is_cancelled(PostPathData(post_path, False))

    def __bool__(self):
        return bool(self.paths)

    def __eq__(self, other):
        if not isinstance(other, self.__class__):
            return False
        if len(self.paths) != len(other.paths):
            return False
        for (n, path) in enumerate(self.paths):
            if path != other.paths[n]:
                return False
        return True

    def clone(self):
        clone = self.__class__()
        clone.paths = self.paths[:]
        clone.reduced = self.reduced
        return clone

    @as_native_str()
    def __repr__(self):
        return " ; ".join((repr(x) for x in self.paths))

[docs]    def as_clause_base(self, db, discussion_id, include_breakpoints=False,
                       include_deleted=False):
        """Express collection as a SQLAlchemy query clause.

        :param bool include_breakpoints: Include posts where
            a threadbreak happens
        :param include_deleted: Include posts in deleted_publication_states.
            True means only deleted posts, None means all posts,
            False means only live posts or deleted posts with live descendants.
        """
        assert self.reduced
        def base_query(labeled=False):
            post = with_polymorphic(
                Post, [], Post.__table__,
                aliased=False, flat=True)
            content = with_polymorphic(
                Content, [], Content.__table__,
                aliased=False, flat=True)
            if labeled:
                query = db.query(post.id.label("post_id"))
            else:
                query = db.query(post.id)
            query = query.join(content, (content.id == post.id) &
                               (content.discussion_id==discussion_id))
            if include_deleted is not None:
                if include_deleted:
                    query = query.filter(
                        post.publication_state.in_(deleted_publication_states))
                else:
                    query = query.filter(content.tombstone_date == None)
            return post, query
        if not self.paths:
            post, q = base_query(True)
            return q.filter(False).subquery("relposts")
        includes_by_level = [[]]
        excludes_by_level = [[]]
        ancestry = []
        for path in self.paths:
            while ancestry:
                if not path.post_path.startswith(ancestry[-1].post_path):
                    ancestry.pop()
                else:
                    break
            level = len(ancestry) // 2
            if path.positive:
                while len(includes_by_level) <= level:
                    includes_by_level.append([])
                includes_by_level[level].append(path)
            else:
                while len(excludes_by_level) <= level:
                    excludes_by_level.append([])
                excludes_by_level[level].append(path)
            ancestry.append(path)
        max_level = max(len(includes_by_level), len(excludes_by_level))
        q = None
        for level in range(max_level):
            condition = None
            # with use_labels, name of final column determined by first query
            post, q2 = base_query(level == 0)
            includes = (includes_by_level[level]
                if level < len(includes_by_level) else [])
            excludes = (excludes_by_level[level]
                if level < len(excludes_by_level) else [])
            include_ids = [path.last_id for path in includes]
            exclude_ids = [path.last_id for path in excludes]
            if include_breakpoints:
                include_ids.extend(exclude_ids)
                exclude_ids = None
            if len(includes):
                ancestry_regex = '^(%s)' % ('|'.join(
                    path.post_path for path in includes))
                condition = or_(
                    post.id.in_(include_ids),
                    post.ancestry.op('~', 0, True)(ancestry_regex))
            if level == 0:
                q = q2.filter(condition)
            else:
                assert condition is not None
                q2 = q2.filter(condition)
                # works in postgres, more efficient
                q = union(q, q2, use_labels=True)
                # rather than
                # q = q.union(q2)
            condition = None
            post, q2 = base_query()
            if len(excludes):
                ancestry_regex = '^(%s)' % ('|'.join(
                    path.post_path for path in excludes))
                condition = post.ancestry.op('~', 0, True)(ancestry_regex)
                if exclude_ids:
                    condition = post.id.in_(exclude_ids) | condition
                q = except_(q, q2.filter(condition), use_labels=True)
                # q = q.except_(q2.filter(condition))
            condition = None
        if getattr(q, "c", None) is None:
            # base query
            c = q._entities[0]
            q = q.with_entities(c.expr.label("post_id"))
            q = q.subquery("relposts")
        else:
            # compound query, already has columns
            q = q.alias("relposts")
        return q

    def as_clause(self, db, discussion_id, user_id=None, content=None,
                  include_deleted=False):
        subq = self.as_clause_base(db, discussion_id, include_deleted=include_deleted)
        content = content or with_polymorphic(
            Content, [], Content.__table__,
            aliased=False, flat=True)

        q = db.query(content).filter(
                (content.discussion_id == discussion_id)
                & (content.hidden == False)
                ).join(subq, content.id == subq.c.post_id)
        if include_deleted is not None:
            if include_deleted:
                post = with_polymorphic(
                    Post, [], Post.__table__,
                    aliased=False, flat=True)
                q = q.join(
                    post, (post.id == content.id) &
                    post.publication_state.in_(deleted_publication_states))
            else:
                q = q.filter(content.tombstone_date == None)

        if user_id:
            # subquery?
            q = q.outerjoin(
                ViewPost,
                (ViewPost.post_id == content.id)
                & (ViewPost.tombstone_date == None)
                & (ViewPost.actor_id == user_id)
                ).add_columns(ViewPost.id)
        return q


[docs]class PostPathGlobalCollection(object):
    """Collects PostPathLocalCollections for each idea in the discussion
    Maintains paths, a dictionary of PostPathLocalCollections by idea_id
    """
    positives = IdeaContentPositiveLink.polymorphic_identities()
    negatives = IdeaContentNegativeLink.polymorphic_identities()

    def __init__(self, discussion=None):
        self.paths = defaultdict(PostPathLocalCollection)
        if discussion is not None:
            self.load_discussion(discussion)

    def load_discussion(self, discussion):
        self.discussion = discussion
        post = with_polymorphic(Content, [Post])
        ICL = with_polymorphic(
            IdeaContentLink, [], IdeaContentLink.__table__,
            aliased=False, flat=True)
        post = with_polymorphic(
            Post, [], Post.__table__, aliased=False, flat=True)
        # This should be a join but creates a subquery
        content = with_polymorphic(
            Content, [], Content.__table__, aliased=False, flat=True)
        q = discussion.db.query(
            ICL.idea_id,
            ICL.type,
            post.ancestry.op('||')(post.id.cast(String))
            ).join(post, post.id == ICL.content_id
            ).join(content, content.id == post.id
            ).filter(
                ICL.idea_id != None,
                content.discussion_id==discussion.id,
                content.hidden==False)
        for (idea_id, typename, path) in q:
            path += ","
            if typename in self.positives:
                self.paths[idea_id].add_path(PostPathData(path, True))
            elif typename in self.negatives:
                self.paths[idea_id].add_path(PostPathData(path, False))
        for ppc in self.paths.values():
            ppc.reduce()


[docs]class PostPathCombiner(PostPathGlobalCollection, IdeaVisitor):
    """A traversal that will combine the PostPathLocalCollections
    of an idea with those of the idea's ancestors.
    The result is that the as_clause of each PostPathLocalCollections
    in self.paths is globally complete"""
    def __init__(self, discussion):
        super(PostPathCombiner, self).__init__(discussion)

    def init_from(self, post_path_global_collection):
        for id, paths in post_path_global_collection.paths.items():
            self.paths[id] = paths.clone()
        self.discussion = post_path_global_collection.discussion

    def visit_idea(self, idea_id, level, prev_result):
        if isinstance(idea_id, Idea):
            idea_id = idea_id.id
        return self.paths[idea_id]

    def copy_result(self, idea_id, parent_result, child_result):
        # When the parent has no information, and can get it from a single child
        parent_result.paths = child_result.paths[:]

    def end_visit(self, idea_id, level, result, child_results):
        if isinstance(idea_id, Idea):
            idea_id = idea_id.id
        child_results = [
            res for (child, res) in child_results if bool(res)]
        if len(child_results) == 1 and not result:
            # optimisation
            self.copy_result(idea_id, result, child_results[0])
        else:
            for r in child_results:
                result.combine(r)
        self.root_idea_id = idea_id
        return result

    def orphan_clause(self, user_id=None, content=None, include_deleted=False):
        root_path = self.paths[self.root_idea_id]
        db = self.discussion.default_db
        subq = root_path.as_clause_base(db, self.discussion.id, include_deleted=include_deleted)
        content = content or with_polymorphic(
            Content, [], Content.__table__,
            aliased=False, flat=True)

        synth_post_type = SynthesisPost.__mapper_args__['polymorphic_identity']
        webpage_post_type = Webpage.__mapper_args__['polymorphic_identity']
        q = db.query(content.id.label("post_id")).filter(
                (content.discussion_id == self.discussion.id)
                & (content.hidden == False)
                & (content.type.notin_((synth_post_type, webpage_post_type)))
                & content.id.notin_(subq))
        if include_deleted is not None:
            if include_deleted:
                post = with_polymorphic(
                    Post, [], Post.__table__,
                    aliased=False, flat=True)
                q = q.join(
                    post, (post.id == content.id) &
                    post.publication_state.in_(deleted_publication_states))
            else:
                q = q.filter(content.tombstone_date == None)

        if user_id:
            # subquery?
            q = q.outerjoin(
                ViewPost,
                (ViewPost.post_id == content.id)
                & (ViewPost.tombstone_date == None)
                & (ViewPost.actor_id == user_id)
                ).add_columns(ViewPost.id)
        return q


[docs]class PostPathCounter(PostPathCombiner):
    "Adds the ability to do post counts to PostPathCombiner."
    def __init__(self, discussion, user_id=None, calc_subset=None):
        super(PostPathCounter, self).__init__(discussion)
        self.counts = {}
        self.viewed_counts = {}
        self.read_counts = {}
        self.contributor_counts = {}
        self.user_id = user_id
        self.calc_subset = calc_subset

    def copy_result(self, idea_id, parent_result, child_result):
        # When the parent has no information, and can get it from a single child
        super(PostPathCounter, self).copy_result(
            idea_id, parent_result, child_result)
        if getattr(parent_result, "count", None) is None:
            return
        parent_result.count = child_result.count
        parent_result.viewed_count = child_result.viewed_count
        self.counts[idea_id] = parent_result.count
        self.viewed_counts[idea_id] = parent_result.viewed_count

    def get_counts_for_query(self, q):
        # HACKITY HACK
        entities = [
            x.entity_zero.entity for x in q._entities]
        entities = {e.__mapper__.tables[0].name: e for e in entities}
        content_entity = entities['content']

        post = with_polymorphic(
            Post, [], Post.__table__,
            aliased=False, flat=True)
        q = q.join(
            post, (content_entity.id == post.id) &
                  (post.publication_state.in_(countable_publication_states)))

        if self.user_id:
            action_entity = entities['action']
            return q.with_entities(
                count(content_entity.id),
                count(post.creator_id.distinct()),
                count(action_entity.id)).first()
        else:
            (post_count, contributor_count) = q.with_entities(
                count(content_entity.id),
                count(post.creator_id.distinct())).first()
            return (post_count, contributor_count, 0)

    def get_counts(self, idea_id):
        if self.counts.get(idea_id, None) is not None:
            return (
                self.counts[idea_id],
                self.contributor_counts[idea_id],
                self.viewed_counts[idea_id])
        path_collection = self.paths[idea_id]
        if not path_collection:
            (
                path_collection.count,
                path_collection.contributor_count,
                path_collection.viewed_count
            ) = (0, 0, 0)
            self.counts[idea_id] = 0
            self.contributor_counts[idea_id] = 0
            self.viewed_counts[idea_id] = 0
            return (0, 0, 0)
        q = path_collection.as_clause(
            self.discussion.db, self.discussion.id, user_id=self.user_id,
            include_deleted=None)
        (
            post_count, contributor_count, viewed_count
        ) = self.get_counts_for_query(q)
        (
            path_collection.count,
            path_collection.contributor_count,
            path_collection.viewed_count
        ) = (
            post_count, contributor_count, viewed_count
        )
        self.counts[idea_id] = post_count
        self.viewed_counts[idea_id] = viewed_count
        self.contributor_counts[idea_id] = contributor_count
        return (post_count, contributor_count, viewed_count)

    def get_orphan_counts(self, include_deleted=False):
        return self.get_counts_for_query(
            self.orphan_clause(self.user_id, include_deleted=include_deleted))

    def end_visit(self, idea_id, level, result, child_results):
        if isinstance(idea_id, Idea):
            idea_id = idea_id.id
        result = super(PostPathCounter, self).end_visit(
            idea_id, level, result, child_results)
        if self.calc_subset is None or (idea_id in self.calc_subset):
            self.get_counts(idea_id)
        return result


[docs]class DiscussionGlobalData(object):
    "Cache for global discussion data, lasts as long as the pyramid request object."
    def __init__(self, db, discussion_id, user_id=None, discussion=None):
        self.discussion_id = discussion_id
        self.db = db
        self.user_id = user_id
        self._discussion = discussion
        self._parent_dict = None
        self._children_dict = None
        self._post_path_collection_raw = None
        self._post_path_counter = None

    @property
    def discussion(self):
        if self._discussion is None:
            self._discussion = Discussion.get(self.discussion_id)
        return self._discussion

    @property
    def parent_dict(self):
        """dictionary child_idea.id -> parent_idea.id.

        TODO: Make it dict(id->id[]) for multiparenting"""
        if self._parent_dict is None:
            source = aliased(Idea, name="source")
            target = aliased(Idea, name="target")
            self._parent_dict = dict(self.db.query(
                IdeaLink.target_id, IdeaLink.source_id
                ).join(source, source.id == IdeaLink.source_id
                ).join(target, target.id == IdeaLink.target_id
                ).filter(
                source.discussion_id == self.discussion_id,
                IdeaLink.tombstone_date == None,
                source.tombstone_date == None,
                target.tombstone_date == None,
                target.discussion_id == self.discussion_id))
        return self._parent_dict

[docs]    def idea_ancestry(self, idea_id):
        """generator of ids of ancestor ideas"""
        while idea_id:
            yield idea_id
            idea_id = self.parent_dict.get(idea_id, None)

    @property
    def children_dict(self):
        if self._children_dict is None:
            if not self.parent_dict:
                (root_id,) = self.db.query(
                    RootIdea.id).filter_by(
                    discussion_id=self.discussion_id).first()
                self._children_dict = {None: (root_id,), root_id: ()}
                return self._children_dict
            children = defaultdict(list)
            for child, parent in self.parent_dict.items():
                children[parent].append(child)
            root = set(children.keys()) - set(self.parent_dict.keys())
            # assert len(root) == 1
            children[None] = [root.pop()]
            self._children_dict = children
        return self._children_dict

    @property
    def post_path_collection_raw(self):
        if self._post_path_collection_raw is None:
            self._post_path_collection_raw = PostPathGlobalCollection(self.discussion)
        return self._post_path_collection_raw

    def post_path_counter(self, user_id, calc_all):
        if (self._post_path_counter is None
                or not isinstance(self._post_path_counter, PostPathCounter)):
            collection = self.post_path_collection_raw
            counter = PostPathCounter(
                self.discussion, user_id, None if calc_all else ())
            counter.init_from(self.post_path_collection_raw)
            Idea.visit_idea_ids_depth_first(
                counter, self.discussion_id, self.children_dict)
            self._post_path_counter = counter
        return self._post_path_counter

    def reset_hierarchy(self):
        self._parent_dict = None
        self._children_dict = None
        self._post_path_counter = None

    def reset_content_links(self):
        self._post_path_collection_raw = None
        self._post_path_counter = None
Source code for assembl.models.path_utils

Idealoom

Navigation

Related Topics