Source code for assembl.models.path_utils

# -*- coding: utf-8 -*-
"""Utilities for traversing the set of content related to an idea and vice-versa."""

from builtins import range
from builtins import object
from functools import total_ordering
from collections import defaultdict
from bisect import bisect_right

from future.utils import as_native_str
from sqlalchemy import String
from sqlalchemy.orm import (with_polymorphic, aliased)
from sqlalchemy.sql.expression import or_, union, except_
from sqlalchemy.sql.functions import count

from .idea_content_link import (
    IdeaContentLink, IdeaContentPositiveLink, IdeaContentNegativeLink)
from .post import (
    Post, Content, SynthesisPost,
    countable_publication_states, deleted_publication_states)
from .annotation import Webpage
from .idea import IdeaVisitor, Idea, IdeaLink, RootIdea
from .discussion import Discussion
from .action import ViewPost

# TODO: Write a discussion structure cache manager.
# This will have caches of parent, children, counts, etc. at need
# and understand the invalidation relationships
# Tie it to request? use request.set_property?


# Cas à surveiller:
# I1 > I2 + P1 > P2
# I1 > I2 - P2
# =>
# I1, I2 < P1 / P2

# I1 > I2 + P1 > P2 > P3
# I1 > I2 - P2
# I1 > I2 > I3 + P3
# =>
# I1, I2 < P1, P3 / P2
# I3 < P3

# I1 + P1 > P2 > P3 > P4 > P5
# I1 - P2
# I1 + P3
# I1 - P5
# => I1, I2 < P1, P3, P4 / P5

# I1 > I2 + P1 > P2
# I1 > I2 - P2
# I1 > I3 + P1 > P3
# I1 > I3 - P1 > P3
# =>
# I2 < P1, P3 / P2
# I3 < P1, P2 / P3
# I1 < P1, P2, P3


[docs]@total_ordering class PostPathData(object): "Data about a single post_path." __slots__=("positive", "post_path") def __init__(self, post_path, positive): self.post_path = post_path self.positive = positive def combine(self, other): if self.positive != other.positive: # if different ideas and neg subpath of pos, use pos. # if same idea, as below # Mais à faire globalement plus bas, pcq on perd des "mêmes idées" en combinant if self.post_path == other.post_path: return other if self.positive else self return None if self.post_path.startswith(other.post_path): return other elif other.post_path.startswith(self.post_path): return self return None @property def last_id(self): return int(self.post_path.strip(',').split(",")[-1]) def __eq__(self, other): if not isinstance(other, self.__class__): return False return ((self.positive == other.positive) and (self.post_path == other.post_path)) def __lt__(self, other): if not isinstance(other, self.__class__): return NotImplemented if self.post_path != other.post_path: # lexicographic sort is good enough return self.post_path < other.post_path # We want positives to come first return self.positive > other.positive def __hash__(self): return hash(self.post_path) + int(self.positive) @as_native_str() def __repr__(self): return "<%s%s>" % ( self.post_path, "+" if self.positive else "-")
[docs]class PostPathLocalCollection(object): "Data about all PostPaths local to an Idea." def __init__(self): self.paths = [] self.reduced = True def add_path(self, path): self.paths.append(path) self.reduced = False
[docs] def combine(self, other): "Combine the paths from two collections (different ideas)." if self and other: # First, filter out negative paths # covered by positive paths in other collection my_paths = [p for p in other.paths if not self.is_cancelled(p)] other_paths = [p for p in self.paths if not other.is_cancelled(p)] # Then combine and reduce. my_paths.extend(other_paths) self.paths = my_paths self.reduce() else: # if either is empty, simple case. self.paths.extend(other.paths)
[docs] def reduce(self): """Reduce overlapping paths. For example, <1,+>,<1,2,+> becomes <1,+> But <1,+>,<1,2,->,<1,2,3,+> remains as-is And <1,+>,<1,2,->,<1,3,4,+> becomes <1,+>,<1,2,->""" if not len(self.paths): return self.paths.sort() paths = [] ancestors_by_polarity = {True: [], False: [], None: []} for path in self.paths: for ancestors in ancestors_by_polarity.values(): while ancestors: if not path.post_path.startswith(ancestors[-1].post_path): ancestors.pop() else: break ancestors = ancestors_by_polarity[None] pol_ancestors = ancestors_by_polarity[path.positive] # Special case: Combine in place if len(ancestors) and ancestors[-1].combine(path) is path: paths[paths.index(ancestors[-1])] = path ancestors[-1] = path pol_ancestors[-1] = path continue # Forget if it combines with a previous path of same polarity # BUT do not combine with distant previous path of same polarity # if closest previous path is super-path. last_path_pol = pol_ancestors[-1] if len(pol_ancestors) else None last_path = ancestors[-1] if len(ancestors) else None if last_path_pol is not None and not ( last_path is not last_path_pol and path.post_path.startswith(last_path.post_path)): combined = last_path_pol.combine(path) assert combined is not path, "We should not combine forward" if combined is not None: continue paths.append(path) ancestors.append(path) pol_ancestors.append(path) self.paths = paths self.reduced = True
def find_insertion(self, data): assert self.reduced point = bisect_right(self.paths, data) is_below = False if point: previous = self.paths[point-1] is_below = data.post_path.startswith(previous.post_path) return point, is_below def is_cancelled(self, subpath): # a negative path from another idea is cancelled if covered # by a positive path. if subpath.positive: return False point, is_below = self.find_insertion(subpath) if not is_below: return False return self.paths[point-1].positive
[docs] def includes_post(self, post_path): "Is this post (given as path) included in this collection?" # Weirdly, same logic as path cancellation. return self.is_cancelled(PostPathData(post_path, False))
def __bool__(self): return bool(self.paths) def __eq__(self, other): if not isinstance(other, self.__class__): return False if len(self.paths) != len(other.paths): return False for (n, path) in enumerate(self.paths): if path != other.paths[n]: return False return True def clone(self): clone = self.__class__() clone.paths = self.paths[:] clone.reduced = self.reduced return clone @as_native_str() def __repr__(self): return " ; ".join((repr(x) for x in self.paths))
[docs] def as_clause_base(self, db, discussion_id, include_breakpoints=False, include_deleted=False): """Express collection as a SQLAlchemy query clause. :param bool include_breakpoints: Include posts where a threadbreak happens :param include_deleted: Include posts in deleted_publication_states. True means only deleted posts, None means all posts, False means only live posts or deleted posts with live descendants. """ assert self.reduced def base_query(labeled=False): post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) content = with_polymorphic( Content, [], Content.__table__, aliased=False, flat=True) if labeled: query = db.query(post.id.label("post_id")) else: query = db.query(post.id) query = query.join(content, (content.id == post.id) & (content.discussion_id==discussion_id)) if include_deleted is not None: if include_deleted: query = query.filter( post.publication_state.in_(deleted_publication_states)) else: query = query.filter(content.tombstone_date == None) return post, query if not self.paths: post, q = base_query(True) return q.filter(False).subquery("relposts") includes_by_level = [[]] excludes_by_level = [[]] ancestry = [] for path in self.paths: while ancestry: if not path.post_path.startswith(ancestry[-1].post_path): ancestry.pop() else: break level = len(ancestry) // 2 if path.positive: while len(includes_by_level) <= level: includes_by_level.append([]) includes_by_level[level].append(path) else: while len(excludes_by_level) <= level: excludes_by_level.append([]) excludes_by_level[level].append(path) ancestry.append(path) max_level = max(len(includes_by_level), len(excludes_by_level)) q = None for level in range(max_level): condition = None # with use_labels, name of final column determined by first query post, q2 = base_query(level == 0) includes = (includes_by_level[level] if level < len(includes_by_level) else []) excludes = (excludes_by_level[level] if level < len(excludes_by_level) else []) include_ids = [path.last_id for path in includes] exclude_ids = [path.last_id for path in excludes] if include_breakpoints: include_ids.extend(exclude_ids) exclude_ids = None if len(includes): ancestry_regex = '^(%s)' % ('|'.join( path.post_path for path in includes)) condition = or_( post.id.in_(include_ids), post.ancestry.op('~', 0, True)(ancestry_regex)) if level == 0: q = q2.filter(condition) else: assert condition is not None q2 = q2.filter(condition) # works in postgres, more efficient q = union(q, q2, use_labels=True) # rather than # q = q.union(q2) condition = None post, q2 = base_query() if len(excludes): ancestry_regex = '^(%s)' % ('|'.join( path.post_path for path in excludes)) condition = post.ancestry.op('~', 0, True)(ancestry_regex) if exclude_ids: condition = post.id.in_(exclude_ids) | condition q = except_(q, q2.filter(condition), use_labels=True) # q = q.except_(q2.filter(condition)) condition = None if getattr(q, "c", None) is None: # base query c = q._entities[0] q = q.with_entities(c.expr.label("post_id")) q = q.subquery("relposts") else: # compound query, already has columns q = q.alias("relposts") return q
def as_clause(self, db, discussion_id, user_id=None, content=None, include_deleted=False): subq = self.as_clause_base(db, discussion_id, include_deleted=include_deleted) content = content or with_polymorphic( Content, [], Content.__table__, aliased=False, flat=True) q = db.query(content).filter( (content.discussion_id == discussion_id) & (content.hidden == False) ).join(subq, content.id == subq.c.post_id) if include_deleted is not None: if include_deleted: post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) q = q.join( post, (post.id == content.id) & post.publication_state.in_(deleted_publication_states)) else: q = q.filter(content.tombstone_date == None) if user_id: # subquery? q = q.outerjoin( ViewPost, (ViewPost.post_id == content.id) & (ViewPost.tombstone_date == None) & (ViewPost.actor_id == user_id) ).add_columns(ViewPost.id) return q
[docs]class PostPathGlobalCollection(object): """Collects PostPathLocalCollections for each idea in the discussion Maintains paths, a dictionary of PostPathLocalCollections by idea_id """ positives = IdeaContentPositiveLink.polymorphic_identities() negatives = IdeaContentNegativeLink.polymorphic_identities() def __init__(self, discussion=None): self.paths = defaultdict(PostPathLocalCollection) if discussion is not None: self.load_discussion(discussion) def load_discussion(self, discussion): self.discussion = discussion post = with_polymorphic(Content, [Post]) ICL = with_polymorphic( IdeaContentLink, [], IdeaContentLink.__table__, aliased=False, flat=True) post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) # This should be a join but creates a subquery content = with_polymorphic( Content, [], Content.__table__, aliased=False, flat=True) q = discussion.db.query( ICL.idea_id, ICL.type, post.ancestry.op('||')(post.id.cast(String)) ).join(post, post.id == ICL.content_id ).join(content, content.id == post.id ).filter( ICL.idea_id != None, content.discussion_id==discussion.id, content.hidden==False) for (idea_id, typename, path) in q: path += "," if typename in self.positives: self.paths[idea_id].add_path(PostPathData(path, True)) elif typename in self.negatives: self.paths[idea_id].add_path(PostPathData(path, False)) for ppc in self.paths.values(): ppc.reduce()
[docs]class PostPathCombiner(PostPathGlobalCollection, IdeaVisitor): """A traversal that will combine the PostPathLocalCollections of an idea with those of the idea's ancestors. The result is that the as_clause of each PostPathLocalCollections in self.paths is globally complete""" def __init__(self, discussion): super(PostPathCombiner, self).__init__(discussion) def init_from(self, post_path_global_collection): for id, paths in post_path_global_collection.paths.items(): self.paths[id] = paths.clone() self.discussion = post_path_global_collection.discussion def visit_idea(self, idea_id, level, prev_result): if isinstance(idea_id, Idea): idea_id = idea_id.id return self.paths[idea_id] def copy_result(self, idea_id, parent_result, child_result): # When the parent has no information, and can get it from a single child parent_result.paths = child_result.paths[:] def end_visit(self, idea_id, level, result, child_results): if isinstance(idea_id, Idea): idea_id = idea_id.id child_results = [ res for (child, res) in child_results if bool(res)] if len(child_results) == 1 and not result: # optimisation self.copy_result(idea_id, result, child_results[0]) else: for r in child_results: result.combine(r) self.root_idea_id = idea_id return result def orphan_clause(self, user_id=None, content=None, include_deleted=False): root_path = self.paths[self.root_idea_id] db = self.discussion.default_db subq = root_path.as_clause_base(db, self.discussion.id, include_deleted=include_deleted) content = content or with_polymorphic( Content, [], Content.__table__, aliased=False, flat=True) synth_post_type = SynthesisPost.__mapper_args__['polymorphic_identity'] webpage_post_type = Webpage.__mapper_args__['polymorphic_identity'] q = db.query(content.id.label("post_id")).filter( (content.discussion_id == self.discussion.id) & (content.hidden == False) & (content.type.notin_((synth_post_type, webpage_post_type))) & content.id.notin_(subq)) if include_deleted is not None: if include_deleted: post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) q = q.join( post, (post.id == content.id) & post.publication_state.in_(deleted_publication_states)) else: q = q.filter(content.tombstone_date == None) if user_id: # subquery? q = q.outerjoin( ViewPost, (ViewPost.post_id == content.id) & (ViewPost.tombstone_date == None) & (ViewPost.actor_id == user_id) ).add_columns(ViewPost.id) return q
[docs]class PostPathCounter(PostPathCombiner): "Adds the ability to do post counts to PostPathCombiner." def __init__(self, discussion, user_id=None, calc_subset=None): super(PostPathCounter, self).__init__(discussion) self.counts = {} self.viewed_counts = {} self.read_counts = {} self.contributor_counts = {} self.user_id = user_id self.calc_subset = calc_subset def copy_result(self, idea_id, parent_result, child_result): # When the parent has no information, and can get it from a single child super(PostPathCounter, self).copy_result( idea_id, parent_result, child_result) if getattr(parent_result, "count", None) is None: return parent_result.count = child_result.count parent_result.viewed_count = child_result.viewed_count self.counts[idea_id] = parent_result.count self.viewed_counts[idea_id] = parent_result.viewed_count def get_counts_for_query(self, q): # HACKITY HACK entities = [ x.entity_zero.entity for x in q._entities] entities = {e.__mapper__.tables[0].name: e for e in entities} content_entity = entities['content'] post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) q = q.join( post, (content_entity.id == post.id) & (post.publication_state.in_(countable_publication_states))) if self.user_id: action_entity = entities['action'] return q.with_entities( count(content_entity.id), count(post.creator_id.distinct()), count(action_entity.id)).first() else: (post_count, contributor_count) = q.with_entities( count(content_entity.id), count(post.creator_id.distinct())).first() return (post_count, contributor_count, 0) def get_counts(self, idea_id): if self.counts.get(idea_id, None) is not None: return ( self.counts[idea_id], self.contributor_counts[idea_id], self.viewed_counts[idea_id]) path_collection = self.paths[idea_id] if not path_collection: ( path_collection.count, path_collection.contributor_count, path_collection.viewed_count ) = (0, 0, 0) self.counts[idea_id] = 0 self.contributor_counts[idea_id] = 0 self.viewed_counts[idea_id] = 0 return (0, 0, 0) q = path_collection.as_clause( self.discussion.db, self.discussion.id, user_id=self.user_id, include_deleted=None) ( post_count, contributor_count, viewed_count ) = self.get_counts_for_query(q) ( path_collection.count, path_collection.contributor_count, path_collection.viewed_count ) = ( post_count, contributor_count, viewed_count ) self.counts[idea_id] = post_count self.viewed_counts[idea_id] = viewed_count self.contributor_counts[idea_id] = contributor_count return (post_count, contributor_count, viewed_count) def get_orphan_counts(self, include_deleted=False): return self.get_counts_for_query( self.orphan_clause(self.user_id, include_deleted=include_deleted)) def end_visit(self, idea_id, level, result, child_results): if isinstance(idea_id, Idea): idea_id = idea_id.id result = super(PostPathCounter, self).end_visit( idea_id, level, result, child_results) if self.calc_subset is None or (idea_id in self.calc_subset): self.get_counts(idea_id) return result
[docs]class DiscussionGlobalData(object): "Cache for global discussion data, lasts as long as the pyramid request object." def __init__(self, db, discussion_id, user_id=None, discussion=None): self.discussion_id = discussion_id self.db = db self.user_id = user_id self._discussion = discussion self._parent_dict = None self._children_dict = None self._post_path_collection_raw = None self._post_path_counter = None @property def discussion(self): if self._discussion is None: self._discussion = Discussion.get(self.discussion_id) return self._discussion @property def parent_dict(self): """dictionary child_idea.id -> parent_idea.id. TODO: Make it dict(id->id[]) for multiparenting""" if self._parent_dict is None: source = aliased(Idea, name="source") target = aliased(Idea, name="target") self._parent_dict = dict(self.db.query( IdeaLink.target_id, IdeaLink.source_id ).join(source, source.id == IdeaLink.source_id ).join(target, target.id == IdeaLink.target_id ).filter( source.discussion_id == self.discussion_id, IdeaLink.tombstone_date == None, source.tombstone_date == None, target.tombstone_date == None, target.discussion_id == self.discussion_id)) return self._parent_dict
[docs] def idea_ancestry(self, idea_id): """generator of ids of ancestor ideas""" while idea_id: yield idea_id idea_id = self.parent_dict.get(idea_id, None)
@property def children_dict(self): if self._children_dict is None: if not self.parent_dict: (root_id,) = self.db.query( RootIdea.id).filter_by( discussion_id=self.discussion_id).first() self._children_dict = {None: (root_id,), root_id: ()} return self._children_dict children = defaultdict(list) for child, parent in self.parent_dict.items(): children[parent].append(child) root = set(children.keys()) - set(self.parent_dict.keys()) # assert len(root) == 1 children[None] = [root.pop()] self._children_dict = children return self._children_dict @property def post_path_collection_raw(self): if self._post_path_collection_raw is None: self._post_path_collection_raw = PostPathGlobalCollection(self.discussion) return self._post_path_collection_raw def post_path_counter(self, user_id, calc_all): if (self._post_path_counter is None or not isinstance(self._post_path_counter, PostPathCounter)): collection = self.post_path_collection_raw counter = PostPathCounter( self.discussion, user_id, None if calc_all else ()) counter.init_from(self.post_path_collection_raw) Idea.visit_idea_ids_depth_first( counter, self.discussion_id, self.children_dict) self._post_path_counter = counter return self._post_path_counter def reset_hierarchy(self): self._parent_dict = None self._children_dict = None self._post_path_counter = None def reset_content_links(self): self._post_path_collection_raw = None self._post_path_counter = None