models.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
import datetime, logging
import math
import urllib
import pickle
import random
import itertools

from google.appengine.api import users
from google.appengine.api import memcache
from google.appengine.ext import deferred
from google.appengine.ext.db import TransactionFailedError
from api.jsonify import jsonify

from google.appengine.ext import db
import object_property
import util
import user_util
import consts
import points
from search import Searchable
from app import App
import layer_cache
import request_cache
from discussion import models_discussion
from topics_list import all_topics_list
import nicknames
from counters import user_counter
from facebook_util import is_facebook_user_id
from accuracy_model import AccuracyModel, InvFnExponentialNormalizer

from templatefilters import slugify
from gae_bingo.gae_bingo import ab_test, bingo
from gae_bingo.models import GAEBingoIdentityModel, ConversionTypes

# Setting stores per-application key-value pairs
# for app-wide settings that must be synchronized
# across all GAE instances.

class Setting(db.Model):

    value = db.StringProperty(indexed=False)

    @staticmethod
    def entity_group_key():
        return db.Key.from_path('Settings', 'default_settings')

    @staticmethod
    def _get_or_set_with_key(key, val = None):
        if val is None:
            return Setting._cache_get_by_key_name(key)
        else:
            setting = Setting(Setting.entity_group_key(), key, value=str(val))
            db.put(setting)
            Setting._get_settings_dict(bust_cache=True)
            return setting.value

    @staticmethod
    def _cache_get_by_key_name(key):
        setting = Setting._get_settings_dict().get(key)
        if setting is not None:
            return setting.value
        return None

    @staticmethod
    @request_cache.cache()
    @layer_cache.cache(layer=layer_cache.Layers.Memcache)
    def _get_settings_dict():
        # ancestor query to ensure consistent results
        query = Setting.all().ancestor(Setting.entity_group_key())
        results = dict((setting.key().name(), setting) for setting in query.fetch(20))
        return results

    @staticmethod
    def cached_library_content_date(val = None):
        return Setting._get_or_set_with_key("cached_library_content_date", val)

    @staticmethod
    def cached_exercises_date(val = None):
        return Setting._get_or_set_with_key("cached_exercises_date", val)

    @staticmethod
    def count_videos(val = None):
        return Setting._get_or_set_with_key("count_videos", val) or 0

    @staticmethod
    def last_youtube_sync_generation_start(val = None):
        return Setting._get_or_set_with_key("last_youtube_sync_generation_start", val) or 0

    @staticmethod
    def smarthistory_version(val = None):
        return Setting._get_or_set_with_key("smarthistory_version", val) or 0

    @staticmethod
    def classtime_report_method(val = None):
        return Setting._get_or_set_with_key("classtime_report_method", val)

    @staticmethod
    def classtime_report_startdate(val = None):
        return Setting._get_or_set_with_key("classtime_report_startdate", val)


class Exercise(db.Model):

    name = db.StringProperty()
    short_display_name = db.StringProperty(default="")
    prerequisites = db.StringListProperty()
    covers = db.StringListProperty()
    v_position = db.IntegerProperty() # actually horizontal position on knowledge map
    h_position = db.IntegerProperty() # actually vertical position on knowledge map
    seconds_per_fast_problem = db.FloatProperty(default = consts.MIN_SECONDS_PER_FAST_PROBLEM) # Seconds expected to finish a problem 'quickly' for badge calculation

    # True if this exercise is live and visible to all users.
    # Non-live exercises are only visible to admins.
    live = db.BooleanProperty(default=False)

    # True if this exercise is a quasi-exercise generated by
    # combining the content of other exercises
    summative = db.BooleanProperty(default=False)

    # Teachers contribute raw html with embedded CSS and JS
    # and we sanitize it with Caja before displaying it to
    # students.
    author = db.UserProperty()
    raw_html = db.TextProperty()
    last_modified = db.DateTimeProperty()
    creation_date = db.DateTimeProperty(auto_now_add=True, default=datetime.datetime(2011, 1, 1))

    _serialize_blacklist = [
            "author", "raw_html", "last_modified",
            "coverers", "prerequisites_ex", "assigned",
            ]

    @property
    def relative_url(self):
        return "/exercises?exid=%s" % self.name

    @property
    def ka_url(self):
        return util.absolute_url("/exercises?exid=%s" % self.name)

    @staticmethod
    def get_by_name(name):
        dict_exercises = Exercise.__get_dict_use_cache_unsafe__()
        if dict_exercises.has_key(name):
            if dict_exercises[name].is_visible_to_current_user():
                return dict_exercises[name]
        return None

    @staticmethod
    def to_display_name(name):
        if name:
            return name.replace('_', ' ').capitalize()
        return ""

    @property
    def display_name(self):
        return Exercise.to_display_name(self.name)

    # The number of "sub-bars" in a summative (equivalently, # of save points + 1)
    @property
    def num_milestones(self):
        return len(self.prerequisites) if self.summative else 1

    @property
    def required_streak(self):
        return consts.REQUIRED_STREAK * self.num_milestones

    def min_problems_imposed(self):
        return consts.MIN_PROBLEMS_IMPOSED

    @staticmethod
    def to_short_name(name):
        exercise = Exercise.get_by_name(name)
        if exercise:
            return exercise.short_name()
        return ""

    def short_name(self):
        if self.short_display_name:
            return self.short_display_name[:11]
        return self.display_name[:11]

    def is_visible_to_current_user(self):
        return self.live or user_util.is_current_user_developer()

    def struggling_threshold(self):
        # 96% of users have proficiency before they get to 30 problems
        # return 3 * self.required_streak

        # 85% of users have proficiency before they get to 19 problems
        return 2 * self.required_streak

    def summative_children(self):
        if not self.summative:
            return []
        query = db.Query(Exercise)
        query.filter("name IN ", self.prerequisites)
        return query

    def non_summative_exercise(self, problem_number):
        if not self.summative:
            return self

        if len(self.prerequisites) <= 0:
            raise Exception("Summative exercise '%s' does not include any other exercises" % self.name)

        # For now we just cycle through all of the included exercises in a summative exercise
        index = int(problem_number) % len(self.prerequisites)
        exid = self.prerequisites[index]

        query = Exercise.all()
        query.filter('name =', exid)
        exercise = query.get()

        if not exercise:
            raise Exception("Unable to find included exercise")

        if exercise.summative:
            return exercise.non_summative_exercise(problem_number)
        else:
            return exercise

    def related_videos_query(self):
        exercise_videos = None
        query = ExerciseVideo.all()
        query.filter('exercise =', self.key()).order('exercise_order')
        return query

    @layer_cache.cache_with_key_fxn(lambda self: "related_videos_%s" % self.key(), layer=layer_cache.Layers.Memcache)
    def related_videos_fetch(self):
        exercise_videos = self.related_videos_query().fetch(10)
        for exercise_video in exercise_videos:
            exercise_video.video # Pre-cache video entity
        return exercise_videos

    # followup_exercises reverse walks the prerequisites to give you
    # the exercises that list the current exercise as its prerequisite.
    # i.e. follow this exercise up with these other exercises
    def followup_exercises(self):
        return [exercise for exercise in Exercise.get_all_use_cache() if self.name in exercise.prerequisites]

    @classmethod
    def all(cls, live_only = False):
        query = super(Exercise, cls).all()
        if live_only or not user_util.is_current_user_developer():
            query.filter("live =", True)
        return query

    @classmethod
    def all_unsafe(cls):
        return super(Exercise, cls).all()

    @staticmethod
    def get_all_use_cache():
        if user_util.is_current_user_developer():
            return Exercise.__get_all_use_cache_unsafe__()
        else:
            return Exercise.__get_all_use_cache_safe__()

    @staticmethod
    @layer_cache.cache_with_key_fxn(lambda *args, **kwargs: "all_exercises_unsafe_%s" % Setting.cached_exercises_date())
    def __get_all_use_cache_unsafe__():
        query = Exercise.all_unsafe().order('h_position')
        return query.fetch(400)

    @staticmethod
    def __get_all_use_cache_safe__():
        return filter(lambda exercise: exercise.live, Exercise.__get_all_use_cache_unsafe__())

    @staticmethod
    @layer_cache.cache_with_key_fxn(lambda *args, **kwargs: "all_exercises_dict_unsafe_%s" % Setting.cached_exercises_date())
    def __get_dict_use_cache_unsafe__():
        exercises = Exercise.__get_all_use_cache_unsafe__()
        dict_exercises = {}
        for exercise in exercises:
            dict_exercises[exercise.name] = exercise
        return dict_exercises

    @staticmethod
    @layer_cache.cache(expiration=3600)
    def get_count():
        return Exercise.all(live_only=True).count()

    def put(self):
        Setting.cached_exercises_date(str(datetime.datetime.now()))
        db.Model.put(self)
        Exercise.get_count(bust_cache=True)

    @staticmethod
    def get_dict(query, fxn_key):
        exercise_dict = {}
        for exercise in query.fetch(10000):
            exercise_dict[fxn_key(exercise)] = exercise
        return exercise_dict

def clamp(min_val, max_val):
    def decorator(target_fn):
        def wrapped(*arg, **kwargs):
            return sorted((min_val, target_fn(*arg, **kwargs), max_val))[1]
        return wrapped
    return decorator

class UserExercise(db.Model):

    user = db.UserProperty()
    exercise = db.StringProperty()
    exercise_model = db.ReferenceProperty(Exercise)
    streak = db.IntegerProperty(default = 0)
    _progress = db.FloatProperty(default = None, indexed=False)  # A continuous value >= 0.0, where 1.0 means proficiency. This measure abstracts away the internal proficiency model.
    longest_streak = db.IntegerProperty(default = 0, indexed=False)
    # TODO(david): This property can be removed once we completely move off the streak display.
    streak_start = db.FloatProperty(default = 0.0, indexed=False)  # The starting point of the streak bar as it appears to the user, in [0,1)
    first_done = db.DateTimeProperty(auto_now_add=True)
    last_done = db.DateTimeProperty()
    total_done = db.IntegerProperty(default = 0)
    total_correct = db.IntegerProperty(default = 0)
    last_review = db.DateTimeProperty(default=datetime.datetime.min)
    review_interval_secs = db.IntegerProperty(default=(60 * 60 * 24 * consts.DEFAULT_REVIEW_INTERVAL_DAYS), indexed=False) # Default 7 days until review
    proficient_date = db.DateTimeProperty()
    seconds_per_fast_problem = db.FloatProperty(default = consts.MIN_SECONDS_PER_FAST_PROBLEM, indexed=False) # Seconds expected to finish a problem 'quickly' for badge calculation
    summative = db.BooleanProperty(default=False, indexed=False)
    _accuracy_model = object_property.ObjectProperty()  # Stateful function object that estimates P(next problem correct). Only exists for new UserExercise objects.

    _USER_EXERCISE_KEY_FORMAT = "UserExercise.all().filter('user = '%s')"

    _serialize_blacklist = ["review_interval_secs", "_progress", "_accuracy_model"]

    _MIN_PROBLEMS_FROM_ACCURACY_MODEL = AccuracyModel.min_streak_till_threshold(consts.PROFICIENCY_ACCURACY_THRESHOLD)

    # A bound function object to normalize the progress bar display from a probability
    _normalize_progress = InvFnExponentialNormalizer(
        AccuracyModel(),
        consts.PROFICIENCY_ACCURACY_THRESHOLD
    ).normalize

    def proficiency_model(self):
        user_data = UserData.current()
        return user_data.proficiency_model if user_data else 'streak'

    @property
    def required_streak(self):
        if self.summative:
            return Exercise.get_by_name(self.exercise).required_streak
        else:
            return consts.REQUIRED_STREAK

    @property
    def exercise_states(self):
        user_exercise_graph = self.get_user_exercise_graph()
        if user_exercise_graph:
            return user_exercise_graph.states(self.exercise)
        return None

    @property
    def next_points(self):
        user_data = self.get_user_data()

        suggested = proficient = False

        if user_data:
            suggested = user_data.is_suggested(self.exercise)
            proficient = user_data.is_proficient_at(self.exercise)

        return points.ExercisePointCalculator(self, suggested, proficient)

    @property
    def num_milestones(self):
        return self.exercise_model.num_milestones

    def min_problems_imposed(self):
        return self.exercise_model.min_problems_imposed()

    def min_problems_required(self):
        return max(self.min_problems_imposed(), UserExercise._MIN_PROBLEMS_FROM_ACCURACY_MODEL)

    # Do not transition old objects that did not have the _accuracy_model
    # property - only new UserExercise objects can use the new proficiency
    # model.
    def accuracy_model(self):
        # TODO(david): When we fully switch away from the streak model,
        #     uncomment the lines below and refactor code to remove
        #     accuracy_model guards.
        #if self._accuracy_model is None:
        #    self._accuracy_model = AccuracyModel(self)
        return self._accuracy_model

    def bingo_proficiency_model(self, test):
        # We only want to score conversions for newly-created UserExercise
        # objects that could actually use the new proficiency model behavior
        # (all existing UserExercise objects use the old streak model to
        # facilitate transitioning).
        if self.accuracy_model():
            bingo(test)

    def use_streak_model(self):
        return self.proficiency_model() == 'streak' or not self.accuracy_model()

    # Faciliate transition for old objects that did not have the _progress property
    @property
    @clamp(0.0, 1.0)
    def progress(self):
        if self._progress is None:
            self._progress = self._get_progress_from_current_state()
        return self._progress

    def bingo_prof_model_accuracy_threshold_tests(self):
        if self.total_done < 5 or not self.accuracy_model():
            return

        accuracy = self.accuracy_model().predict()

        if self.exercise in UserData.conversion_test_easy_exercises:
            for threshold in UserData.prof_conversion_accuracy_thresholds:
                if accuracy >= threshold:
                    self.bingo_proficiency_model('prof_accuracy_above_%s_easy' % threshold)

        elif self.exercise in UserData.conversion_test_hard_exercises:
            for threshold in UserData.prof_conversion_accuracy_thresholds:
                if accuracy >= threshold:
                    self.bingo_proficiency_model('prof_accuracy_above_%s_hard' % threshold)

    def update_proficiency_model(self, correct):
        if not correct:
            if self.summative:
                # Reset to latest milestone
                self.streak = (self.streak // consts.CHALLENGE_STREAK_BARRIER) * consts.CHALLENGE_STREAK_BARRIER
            else:
                self.streak = 0

        if self.accuracy_model():
            self.accuracy_model().update(correct)
            self.bingo_prof_model_accuracy_threshold_tests()

        self._progress = self._get_progress_from_current_state()

        if self.use_streak_model():
            self._update_progress_from_streak_model(correct)

    @clamp(0.0, 1.0)
    def _get_progress_from_current_state(self):

        if self.use_streak_model():
            if self._progress is not None:
                return self._progress

            if self.summative:
                return float(self.streak) / self.required_streak
            else:
                return self.streak_start + (
                    float(self.streak) / self.required_streak * (1.0 - self.streak_start))

        if self.total_correct == 0:
            return 0.0

        if self.accuracy_model().total_done <= self.accuracy_model().total_correct():
            # Impose a minimum number of problems required to be done.
            # If the user has no wrong answers yet, we can get a progress bar
            # amount by just dividing correct answers by the # of problems
            # required.
            normalized_prediction = min(float(self.accuracy_model().total_correct()) / self.min_problems_required(), 1.0)
        else:
            prediction = self.accuracy_model().predict()
            normalized_prediction = UserExercise._normalize_progress(prediction)

        if self.summative:
            if self._progress is None:
                milestones_completed = self.streak // consts.CHALLENGE_STREAK_BARRIER
            else:
                milestones_completed = math.floor(self._progress * self.num_milestones)

            if normalized_prediction >= 1.0:
                # The user just crossed a challenge barrier. Reset their
                # accuracy model to start fresh.
                self._accuracy_model = AccuracyModel()

            return float(milestones_completed + normalized_prediction) / self.num_milestones

        else:
            return normalized_prediction

    def _update_progress_from_streak_model(self, correct):
        assert self._progress is not None

        if correct:
            if self._progress >= 1.0:
                self._progress = 1.0
                return

            if self.summative:
                progress_increment = 1.0 / self.required_streak
            else:
                progress_increment = (1.0 - self._progress) / (self.required_streak - self.streak)

            self._progress += progress_increment

        else:
            if self.summative:
                self._progress = float(self.streak) / self.required_streak
            else:
                self._progress *= consts.STREAK_RESET_FACTOR

    @staticmethod
    def to_progress_display(num):
        return '%.0f%%' % math.floor(num * 100.0) if num <= consts.MAX_PROGRESS_SHOWN else 'Max'

    def progress_display(self):
        return UserExercise.to_progress_display(self.progress)

    @staticmethod
    def get_key_for_email(email):
        return UserExercise._USER_EXERCISE_KEY_FORMAT % email

    @staticmethod
    def get_for_user_data(user_data):
        query = UserExercise.all()
        query.filter('user =', user_data.user)
        return query

    def get_user_data(self):
        user_data = None

        if hasattr(self, "_user_data"):
            user_data = self._user_data
        else:
            user_data = UserData.get_from_db_key_email(self.user.email())

        if not user_data:
            logging.critical("Empty user data for UserExercise w/ .user = %s" % self.user)

        return user_data

    def get_user_exercise_graph(self):
        user_exercise_graph = None

        if hasattr(self, "_user_exercise_graph"):
            user_exercise_graph = self._user_exercise_graph
        else:
            user_exercise_graph = UserExerciseGraph.get(self.get_user_data())

        return user_exercise_graph

    def belongs_to(self, user_data):
        return user_data and self.user.email().lower() == user_data.key_email.lower()

    def struggling_threshold(self):
        return self.exercise_model.struggling_threshold()

    @staticmethod
    def get_review_interval_from_seconds(seconds):
        review_interval = datetime.timedelta(seconds=seconds)

        if review_interval.days < consts.MIN_REVIEW_INTERVAL_DAYS:
            review_interval = datetime.timedelta(days=consts.MIN_REVIEW_INTERVAL_DAYS)
        elif review_interval.days > consts.MAX_REVIEW_INTERVAL_DAYS:
            review_interval = datetime.timedelta(days=consts.MAX_REVIEW_INTERVAL_DAYS)

        return review_interval

    def has_been_proficient(self):
        return self.proficient_date is not None

    def get_review_interval(self):
        return UserExercise.get_review_interval_from_seconds(self.review_interval_secs)

    def schedule_review(self, correct, now=datetime.datetime.now()):
        # If the user is not now and never has been proficient, don't schedule a review
        if self.progress < 1.0 and not self.has_been_proficient():
            return

        # If the user is hitting a new streak either for the first time or after having lost
        # proficiency, reset their review interval counter.
        if self.progress >= 1.0:
            self.review_interval_secs = 60 * 60 * 24 * consts.DEFAULT_REVIEW_INTERVAL_DAYS

        review_interval = self.get_review_interval()

        if correct and self.last_review != datetime.datetime.min:
            time_since_last_review = now - self.last_review
            if time_since_last_review >= review_interval:
                review_interval = time_since_last_review * 2
        if not correct:
            review_interval = review_interval // 2
        if correct:
            self.last_review = now
        else:
            self.last_review = datetime.datetime.min
        self.review_interval_secs = review_interval.days * 86400 + review_interval.seconds

    def set_proficient(self, proficient, user_data):
        if not proficient and not self.has_been_proficient():
            # Not proficient and never has been so nothing to do
            return

        if proficient:
            if self.exercise not in user_data.proficient_exercises:
                self.proficient_date = datetime.datetime.now()

                user_data.proficient_exercises.append(self.exercise)
                user_data.need_to_reassess = True
                user_data.put()

                util_notify.update(user_data, self, False, True)

                # Score conversions for A/B test
                self.bingo_proficiency_model('prof_gained_proficiency_all')

                if self.exercise in UserData.conversion_test_hard_exercises:
                    self.bingo_proficiency_model('prof_gained_proficiency_hard')
                    self.bingo_proficiency_model('prof_gained_proficiency_hard_binary')
                    bingo('hints_gained_proficiency_hard_binary')
                elif self.exercise in UserData.conversion_test_easy_exercises:
                    self.bingo_proficiency_model('prof_gained_proficiency_easy')
                    self.bingo_proficiency_model('prof_gained_proficiency_easy_binary')
                    bingo('hints_gained_proficiency_easy_binary')

        else:
            if self.exercise in user_data.proficient_exercises:
                user_data.proficient_exercises.remove(self.exercise)
                user_data.need_to_reassess = True
                user_data.put()

class CoachRequest(db.Model):
    coach_requesting = db.UserProperty()
    student_requested = db.UserProperty()

    @property
    def coach_requesting_data(self):
        if not hasattr(self, "coach_user_data"):
            self.coach_user_data = UserData.get_from_db_key_email(self.coach_requesting.email())
        return self.coach_user_data

    @property
    def student_requested_data(self):
        if not hasattr(self, "student_user_data"):
            self.student_user_data = UserData.get_from_db_key_email(self.student_requested.email())
        return self.student_user_data

    @staticmethod
    def key_for(user_data_coach, user_data_student):
        return "%s_request_for_%s" % (user_data_coach.key_email, user_data_student.key_email)

    @staticmethod
    def get_for(user_data_coach, user_data_student):
        return CoachRequest.get_by_key_name(CoachRequest.key_for(user_data_coach, user_data_student))

    @staticmethod
    def get_or_insert_for(user_data_coach, user_data_student):
        return CoachRequest.get_or_insert(
                key_name = CoachRequest.key_for(user_data_coach, user_data_student),
                coach_requesting = user_data_coach.user,
                student_requested = user_data_student.user,
                )

    @staticmethod
    def get_for_student(user_data_student):
        return CoachRequest.all().filter("student_requested = ", user_data_student.user)

    @staticmethod
    def get_for_coach(user_data_coach):
        return CoachRequest.all().filter("coach_requesting = ", user_data_coach.user)

class StudentList(db.Model):
    name = db.StringProperty()
    coaches = db.ListProperty(db.Key)

    def delete(self, *args, **kwargs):
        self.remove_all_students()
        db.Model.delete(self, *args, **kwargs)

    def remove_all_students(self):
        students = self.get_students_data()
        for s in students:
            s.student_lists.remove(self.key())
        db.put(students)

    @property
    def students(self):
        return UserData.all().filter("student_lists = ", self.key())

    # these methods have the same interface as the methods on UserData
    def get_students_data(self):
        return [s for s in self.students]

    @staticmethod
    def get_for_coach(key):
        query = StudentList.all()
        query.filter("coaches = ", key)
        return query

class UserVideoCss(db.Model):
    user = db.UserProperty()
    video_css = db.TextProperty()
    pickled_dict = db.BlobProperty()
    last_modified = db.DateTimeProperty(required=True, auto_now=True, indexed=False)
    version = db.IntegerProperty(default=0, indexed=False)

    STARTED, COMPLETED = range(2)

    @staticmethod
    def get_for_user_data(user_data):
        p = pickle.dumps({'started': set([]), 'completed': set([])})
        return UserVideoCss.get_or_insert(UserVideoCss._key_for(user_data),
                                          user=user_data.user,
                                          video_css='',
                                          pickled_dict=p,
                                          )

    @staticmethod
    def _key_for(user_data):
        return 'user_video_css_%s' % user_data.key_email

    @staticmethod
    def set_started(user_data, video, version):
        deferred.defer(set_css_deferred, user_data.key(), video.key(), UserVideoCss.STARTED, version)

    @staticmethod
    def set_completed(user_data, video, version):
        deferred.defer(set_css_deferred, user_data.key(), video.key(), UserVideoCss.COMPLETED, version)

    @staticmethod
    def _chunker(seq, size):
        return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

    def load_pickled(self):
        max_selectors = 20
        css_list = []
        css = pickle.loads(self.pickled_dict)

        started_css = '{background-image:url(/images/video-indicator-started.png);padding-left:14px;}'
        complete_css = '{background-image:url(/images/video-indicator-complete.png);padding-left:14px;}'

        for id in UserVideoCss._chunker(list(css['started']), max_selectors):
            css_list.append(','.join(id))
            css_list.append(started_css)

        for id in UserVideoCss._chunker(list(css['completed']), max_selectors):
            css_list.append(','.join(id))
            css_list.append(complete_css)

        self.video_css = ''.join(css_list)

def set_css_deferred(user_data_key, video_key, status, version):
    user_data = UserData.get(user_data_key)
    uvc = UserVideoCss.get_for_user_data(user_data)
    css = pickle.loads(uvc.pickled_dict)

    id = '.v%d' % video_key.id()
    if status == UserVideoCss.STARTED:
        css['completed'].discard(id)
        css['started'].add(id)
    else:
        css['started'].discard(id)
        css['completed'].add(id)

    uvc.pickled_dict = pickle.dumps(css)
    uvc.load_pickled()
    uvc.version = version
    db.put(uvc)

PRE_PHANTOM_EMAIL = "http://nouserid.khanacademy.org/pre-phantom-user-2"

class UserData(GAEBingoIdentityModel, db.Model):
    user = db.UserProperty()
    user_id = db.StringProperty()
    user_nickname = db.StringProperty(indexed=False)
    current_user = db.UserProperty()
    moderator = db.BooleanProperty(default=False)
    developer = db.BooleanProperty(default=False)
    joined = db.DateTimeProperty(auto_now_add=True)
    last_login = db.DateTimeProperty(indexed=False)
    proficient_exercises = object_property.StringListCompatTsvProperty() # Names of exercises in which the user is *explicitly* proficient
    all_proficient_exercises = object_property.StringListCompatTsvProperty() # Names of all exercises in which the user is proficient
    suggested_exercises = object_property.StringListCompatTsvProperty()
    badges = object_property.StringListCompatTsvProperty() # All awarded badges
    need_to_reassess = db.BooleanProperty(indexed=False)
    points = db.IntegerProperty(default = 0)
    total_seconds_watched = db.IntegerProperty(default = 0)
    coaches = db.StringListProperty()
    coworkers = db.StringListProperty()
    student_lists = db.ListProperty(db.Key)
    map_coords = db.StringProperty(indexed=False)
    expanded_all_exercises = db.BooleanProperty(default=True, indexed=False)
    videos_completed = db.IntegerProperty(default = -1)
    last_daily_summary = db.DateTimeProperty(indexed=False)
    last_badge_review = db.DateTimeProperty(indexed=False)
    last_activity = db.DateTimeProperty(indexed=False)
    start_consecutive_activity_date = db.DateTimeProperty(indexed=False)
    count_feedback_notification = db.IntegerProperty(default = -1, indexed=False)
    question_sort_order = db.IntegerProperty(default = -1, indexed=False)
    user_email = db.StringProperty()
    uservideocss_version = db.IntegerProperty(default = 0, indexed=False)

    _serialize_blacklist = [
            "badges", "count_feedback_notification",
            "last_daily_summary", "need_to_reassess", "videos_completed",
            "moderator", "expanded_all_exercises", "question_sort_order",
            "last_login", "user", "current_user", "map_coords", "expanded_all_exercises",
            "user_nickname", "user_email", "seconds_since_joined",
    ]

    prof_conversion_accuracy_thresholds = [0.85, 0.90, 0.92, 0.94, 0.96]
    _prof_model_conversion_tests = ([
        ('prof_gained_proficiency_all', ConversionTypes.Counting),
        ('prof_gained_proficiency_easy', ConversionTypes.Counting),
        ('prof_gained_proficiency_hard', ConversionTypes.Counting),
        ('prof_gained_proficiency_easy_binary', ConversionTypes.Binary),
        ('prof_gained_proficiency_hard_binary', ConversionTypes.Binary),
        ('prof_problems_done', ConversionTypes.Counting),
        ('prof_new_exercises_attempted', ConversionTypes.Counting),
        ('prof_does_problem_just_after_proficiency', ConversionTypes.Counting),
        ('prof_problem_correct_just_after_proficiency', ConversionTypes.Counting),
        ('prof_wrong_problems', ConversionTypes.Counting),
        ('prof_keep_going_after_wrong', ConversionTypes.Counting),
    ] + [('prof_accuracy_above_%s_easy' % p, ConversionTypes.Binary) for p in prof_conversion_accuracy_thresholds]
    + [('prof_accuracy_above_%s_hard' % p, ConversionTypes.Binary) for p in prof_conversion_accuracy_thresholds])
    _prof_model_conversion_names, _prof_model_conversion_types = [list(x) for x in zip(*_prof_model_conversion_tests)]

    conversion_test_hard_exercises = set(['order_of_operations', 'graphing_points',
        'probability_1', 'domain_of_a_function', 'division_4',
        'ratio_word_problems', 'writing_expressions_1', 'ordering_numbers',
        'geometry_1', 'converting_mixed_numbers_and_improper_fractions'])
    conversion_test_easy_exercises = set(['counting_1', 'significant_figures_1', 'subtraction_1'])

    @property
    @request_cache.cache()
    def proficiency_model(self):
        return ab_test("Proficiency Model", {"accuracy": 1, "streak": 9},
            UserData._prof_model_conversion_names, UserData._prof_model_conversion_types)

    @property
    def nickname(self):
        # Only return cached value if it exists and it wasn't cached during a Facebook API hiccup
        if self.user_nickname and not is_facebook_user_id(self.user_nickname):
            return self.user_nickname
        else:
            return nicknames.get_nickname_for(self)

    @property
    def email(self):
        return self.user_email

    @property
    def key_email(self):
        return self.user.email()

    @property
    def badge_counts(self):
        return util_badges.get_badge_counts(self)

    @staticmethod
    @request_cache.cache()
    def current():
        user_id = util.get_current_user_id(bust_cache=True)
        email = user_id

        google_user = users.get_current_user()
        if google_user:
            email = google_user.email()

        if user_id:
            # Once we have rekeyed legacy entities,
            # we will be able to simplify this.we make
            return  UserData.get_from_user_id(user_id) or \
                    UserData.get_from_db_key_email(email) or \
                    UserData.insert_for(user_id, email)
        return None

    @staticmethod
    def pre_phantom():
        return UserData.insert_for(PRE_PHANTOM_EMAIL, PRE_PHANTOM_EMAIL)

    @property
    def is_phantom(self):
        return util.is_phantom_user(self.user_id)

    @property
    def is_pre_phantom(self):
        return PRE_PHANTOM_EMAIL == self.user_email

    @property
    def seconds_since_joined(self):
        return util.seconds_since(self.joined)

    @staticmethod
    @request_cache.cache_with_key_fxn(lambda user_id: "UserData_user_id:%s" % user_id)
    def get_from_user_id(user_id):
        if not user_id:
            return None

        query = UserData.all()
        query.filter('user_id =', user_id)
        query.order('-points') # Temporary workaround for issue 289

        return query.get()

    @staticmethod
    def get_from_user_input_email(email):
        if not email:
            return None

        query = UserData.all()
        query.filter('user_email =', email)
        query.order('-points') # Temporary workaround for issue 289

        return query.get()

    @staticmethod
    def get_from_db_key_email(email):
        if not email:
            return None

        query = UserData.all()
        query.filter('user =', users.User(email))
        query.order('-points') # Temporary workaround for issue 289

        return query.get()

    @staticmethod
    def insert_for(user_id, email):
        if not user_id or not email:
            return None

        user = users.User(email)
        key = "user_id_key_%s" % user_id

        user_data = UserData.get_or_insert(
            key_name=key,
            user=user,
            current_user=user,
            user_id=user_id,
            moderator=False,
            last_login=datetime.datetime.now(),
            proficient_exercises=[],
            suggested_exercises=[],
            need_to_reassess=True,
            points=0,
            coaches=[],
            user_email=email

            )

        if not user_data.is_phantom:
            # Record that we now have one more registered user
            if (datetime.datetime.now() - user_data.joined).seconds < 60:
                # Extra safety check against user_data.joined in case some
                # subtle bug results in lots of calls to insert_for for
                # UserData objects with existing key_names.
                user_counter.add(1)

        return user_data

    def delete(self):
        logging.info("Deleting user data for %s with points %s" % (self.key_email, self.points))
        logging.info("Dumping user data for %s: %s" % (self.user_id, jsonify(self)))

        if not self.is_phantom:
            user_counter.add(-1)

        db.delete(self)

    def get_or_insert_exercise(self, exercise, allow_insert = True):
        if not exercise:
            return None

        exid = exercise.name
        userExercise = UserExercise.get_by_key_name(exid, parent=self)

        if not userExercise:
            # There are some old entities lying around that don't have keys.
            # We have to check for them here, but once we have reparented and rekeyed legacy entities,
            # this entire function can just be a call to .get_or_insert()
            query = UserExercise.all(keys_only = True)
            query.filter('user =', self.user)
            query.filter('exercise =', exid)
            query.order('-total_done') # Temporary workaround for issue 289

            # In order to guarantee consistency in the HR datastore, we need to query
            # via db.get for these old, parent-less entities.
            key_user_exercise = query.get()
            if key_user_exercise:
                userExercise = UserExercise.get(str(key_user_exercise))

        if allow_insert and not userExercise:
            userExercise = UserExercise.get_or_insert(
                key_name=exid,
                parent=self,
                user=self.user,
                exercise=exid,
                exercise_model=exercise,
                streak=0,
                _progress=0.0,
                streak_start=0.0,
                longest_streak=0,
                first_done=datetime.datetime.now(),
                last_done=None,
                total_done=0,
                summative=exercise.summative,
                _accuracy_model=AccuracyModel(),
                )

        return userExercise

    def reassess_from_graph(self, user_exercise_graph):
        all_proficient_exercises = user_exercise_graph.proficient_exercise_names()
        suggested_exercises = user_exercise_graph.suggested_exercise_names()

        is_changed = (all_proficient_exercises != self.all_proficient_exercises or
                      suggested_exercises != self.suggested_exercises)

        self.all_proficient_exercises = all_proficient_exercises
        self.suggested_exercises = suggested_exercises
        self.need_to_reassess = False

        return is_changed

    def reassess_if_necessary(self, user_exercise_graph=None):
        if not self.need_to_reassess or self.all_proficient_exercises is None:
            return False

        if user_exercise_graph is None:
            user_exercise_graph = UserExerciseGraph.get(self)

        return self.reassess_from_graph(user_exercise_graph)

    def is_proficient_at(self, exid, exgraph=None):
        self.reassess_if_necessary(exgraph)
        return (exid in self.all_proficient_exercises)

    def is_explicitly_proficient_at(self, exid):
        return (exid in self.proficient_exercises)

    def is_suggested(self, exid):
        self.reassess_if_necessary()
        return (exid in self.suggested_exercises)

    def get_students_data(self):
        coach_email = self.key_email
        query = UserData.all().filter('coaches =', coach_email)
        students_data = [s for s in query.fetch(1000)]

        if coach_email.lower() != coach_email:
            students_set = set([s.key().id_or_name() for s in students_data])
            query = UserData.all().filter('coaches =', coach_email.lower())
            for student_data in query:
                if student_data.key().id_or_name() not in students_set:
                    students_data.append(student_data)
        return students_data

    def get_coworkers_data(self):
        return filter(lambda user_data: user_data is not None, \
                map(lambda coworker_email: UserData.get_from_db_key_email(coworker_email) , self.coworkers))

    def has_students(self):
        coach_email = self.key_email
        count = UserData.all().filter('coaches =', coach_email).count()

        if coach_email.lower() != coach_email:
            count += UserData.all().filter('coaches =', coach_email.lower()).count()

        return count > 0

    def coach_emails(self):
        emails = []
        for key_email in self.coaches:
            user_data_coach = UserData.get_from_db_key_email(key_email)
            if user_data_coach:
                emails.append(user_data_coach.email)
        return emails

    def is_coached_by(self, user_data_coach):
        return user_data_coach.key_email in self.coaches or user_data_coach.key_email.lower() in self.coaches

    def is_coworker_of(self, user_data_coworker):
        return user_data_coworker.key_email in self.coworkers

    def is_coached_by_coworker_of_coach(self, user_data_coach):
        for coworker_email in user_data_coach.coworkers:
            if coworker_email in self.coaches:
                return True
        return False

    def is_administrator(self):
        # Only works for currently logged in user. Make sure there
        # is both a current user data and current user is an admin.
        user_data = UserData.current()
        return user_data and users.is_current_user_admin()

    def is_visible_to(self, user_data):
        return self.is_coached_by(user_data) or self.is_coached_by_coworker_of_coach(user_data) or user_data.developer or user_data.is_administrator()

    def are_students_visible_to(self, user_data):
        return self.is_coworker_of(user_data) or user_data.developer or user_data.is_administrator()

    def record_activity(self, dt_activity):

        # Make sure last_activity and start_consecutive_activity_date have values
        self.last_activity = self.last_activity or dt_activity
        self.start_consecutive_activity_date = self.start_consecutive_activity_date or dt_activity

        if dt_activity > self.last_activity:

            # If it has been over 36 hours since we last saw this user, restart the consecutive activity streak.
            #
            # We allow for a lenient 36 hours in order to offer kinder timezone interpretation.
            # See http://meta.stackoverflow.com/questions/55483/proposed-consecutive-days-badge-tracking-change
            if util.hours_between(self.last_activity, dt_activity) >= 36:
                self.start_consecutive_activity_date = dt_activity

            self.last_activity = dt_activity

    def current_consecutive_activity_days(self):
        if not self.last_activity or not self.start_consecutive_activity_date:
            return 0

        dt_now = datetime.datetime.now()

        # If it has been over 36 hours since last activity, bail.
        if util.hours_between(self.last_activity, dt_now) >= 36:
            return 0

        return (self.last_activity - self.start_consecutive_activity_date).days

    def add_points(self, points):
        if self.points == None:
            self.points = 0

        if not hasattr(self, "_original_points"):
            self._original_points = self.points

        if (self.points % 2500) > ((self.points+points) % 2500): #Check if we crossed an interval of 2500 points
            util_notify.update(self,None,True)
        self.points += points

    def original_points(self):
        if hasattr(self, "_original_points"):
            return self._original_points
        return 0

    def get_videos_completed(self):
        if self.videos_completed < 0:
            self.videos_completed = UserVideo.count_completed_for_user_data(self)
            self.put()
        return self.videos_completed

    def feedback_notification_count(self):
        if self.count_feedback_notification == -1:
            self.count_feedback_notification = models_discussion.FeedbackNotification.gql("WHERE user = :1", self.user).count()
            self.put()
        return self.count_feedback_notification

class Video(Searchable, db.Model):
    youtube_id = db.StringProperty()
    url = db.StringProperty()
    title = db.StringProperty()
    description = db.TextProperty()
    playlists = db.StringListProperty()
    keywords = db.StringProperty()
    duration = db.IntegerProperty(default = 0)

    # Human readable, unique id that can be used in URLS.
    readable_id = db.StringProperty()

    # YouTube view count from last sync.
    views = db.IntegerProperty(default = 0)

    # Date first added via KA library sync with YouTube.
    # This property hasn't always existsed, so for many old videos
    # this date may be much later than the actual YouTube upload date.
    date_added = db.DateTimeProperty(auto_now_add=True)

    # Last download version in which video download was prepped.
    download_version = db.IntegerProperty(default = 0)
    CURRENT_DOWNLOAD_VERSION = 2

    _serialize_blacklist = ["download_version", "CURRENT_DOWNLOAD_VERSION"]

    INDEX_ONLY = ['title', 'keywords', 'description']
    INDEX_TITLE_FROM_PROP = 'title'
    INDEX_USES_MULTI_ENTITIES = False

    @property
    def ka_url(self):
        return util.absolute_url('/video/%s' % self.readable_id)

    @property
    def download_urls(self):
        if self.download_version == Video.CURRENT_DOWNLOAD_VERSION:
            download_url_base = "http://www.archive.org/download/KA-converted-%s" % self.youtube_id

            return {
                    "mp4": "%s/%s.mp4" % (download_url_base, self.youtube_id),
                    "png": "%s/%s.png" % (download_url_base, self.youtube_id),
                    }

        return None

    def download_video_url(self):
        download_urls = self.download_urls
        if download_urls:
            return download_urls.get("mp4")
        return None

    def youtube_thumbnail_url(self):
        return "http://img.youtube.com/vi/%s/hqdefault.jpg" % self.youtube_id

    @staticmethod
    def get_for_readable_id(readable_id):
        video = None
        query = Video.all()
        query.filter('readable_id =', readable_id)
        # The following should just be:
        # video = query.get()
        # but the database currently contains multiple Video objects for a particular
        # video.  Some are old.  Some are due to a YouTube sync where the youtube urls
        # changed and our code was producing youtube_ids that ended with '_player'.
        # This hack gets the most recent valid Video object.
        key_id = 0
        for v in query:
            if v.key().id() > key_id and not v.youtube_id.endswith('_player'):
                video = v
                key_id = v.key().id()
        # End of hack
        return video

    @staticmethod
    def get_all_live():
        query = VideoPlaylist.all().filter('live_association = ', True)
        vps = query.fetch(10000)
        keys = [VideoPlaylist.video.get_value_for_datastore(vp) for vp in vps]
        config = db.create_config(read_policy=db.EVENTUAL_CONSISTENCY)
        return Video.get(keys, config=config)


    def first_playlist(self):
        playlists = VideoPlaylist.get_cached_playlists_for_video(self)
        if playlists:
            return playlists[0]
        return None

    def current_user_points(self):
        user_video = UserVideo.get_for_video_and_user_data(self, UserData.current())
        if user_video:
            return points.VideoPointCalculator(user_video)
        else:
            return 0

    @staticmethod
    def get_dict(query, fxn_key):
        video_dict = {}
        for video in query.fetch(10000):
            video_dict[fxn_key(video)] = video
        return video_dict

    @layer_cache.cache_with_key_fxn(
        lambda self: "related_exercises_%s" % self.key(),
        layer=layer_cache.Layers.Memcache,
        expiration=3600 * 2)
    def related_exercises(self):
        exvids = ExerciseVideo.all()
        exvids.filter('video =', self.key())
        exercises = [ev.exercise for ev in exvids]
        exercises.sort(key=lambda e: e.h_position)
        exercises.sort(key=lambda e: e.v_position)
        return exercises

    @staticmethod
    @layer_cache.cache(expiration=3600)
    def approx_count():
        return int(Setting.count_videos()) / 100 * 100

class Playlist(Searchable, db.Model):

    youtube_id = db.StringProperty()
    url = db.StringProperty()
    title = db.StringProperty()
    description = db.TextProperty()
    readable_id = db.StringProperty() #human readable, but unique id that can be used in URLS
    tags = db.StringListProperty()
    INDEX_ONLY = ['title', 'description']
    INDEX_TITLE_FROM_PROP = 'title'
    INDEX_USES_MULTI_ENTITIES = False

    _serialize_blacklist = ["readable_id"]

    @property
    def ka_url(self):
        return util.absolute_url('#%s' % urllib.quote(slugify(self.title)))

    @staticmethod
    def get_for_all_topics():
        playlists = []
        for playlist in Playlist.all().fetch(1000):
            if playlist.title in all_topics_list:
                playlists.append(playlist)
        return playlists

    def get_exercises(self):
        video_query = Video.all(keys_only=True)
        video_query.filter('playlists = ', self.title)
        video_keys = video_query.fetch(1000)

        exercise_query = Exercise.all()
        exercise_key_dict = Exercise.get_dict(exercise_query, lambda exercise: exercise.key())

        exercise_video_query = ExerciseVideo.all()
        exercise_video_key_dict = ExerciseVideo.get_key_dict(exercise_video_query)

        playlist_exercise_dict = {}
        for video_key in video_keys:
            if exercise_video_key_dict.has_key(video_key):
                for exercise_key in exercise_video_key_dict[video_key]:
                    if exercise_key_dict.has_key(exercise_key):
                        exercise = exercise_key_dict[exercise_key]
                        playlist_exercise_dict[exercise_key] = exercise

        playlist_exercises = []
        for exercise_key in playlist_exercise_dict:
            playlist_exercises.append(playlist_exercise_dict[exercise_key])

        return playlist_exercises

    def get_videos(self):
        video_query = Video.all()
        video_query.filter('playlists = ', self.title)
        video_key_dict = Video.get_dict(video_query, lambda video: video.key())

        video_playlist_query = VideoPlaylist.all()
        video_playlist_query.filter('playlist =', self)
        video_playlist_query.filter('live_association =', True)
        video_playlist_key_dict = VideoPlaylist.get_key_dict(video_playlist_query)

        video_playlists = sorted(video_playlist_key_dict[self.key()].values(), key=lambda video_playlist: video_playlist.video_position)

        videos = []
        for video_playlist in video_playlists:
            video = video_key_dict[VideoPlaylist.video.get_value_for_datastore(video_playlist)]
            video.position = video_playlist.video_position
            videos.append(video)

        return videos


class UserPlaylist(db.Model):
    user = db.UserProperty()
    playlist = db.ReferenceProperty(Playlist)
    seconds_watched = db.IntegerProperty(default = 0)
    last_watched = db.DateTimeProperty(auto_now_add = True)
    title = db.StringProperty(indexed=False)

    @staticmethod
    def get_for_user_data(user_data):
        query = UserPlaylist.all()
        query.filter('user =', user_data.user)
        return query

    @staticmethod
    def get_key_name(playlist, user_data):
        return user_data.key_email + ":" + playlist.youtube_id

    @staticmethod
    def get_for_playlist_and_user_data(playlist, user_data, insert_if_missing=False):
        if not user_data:
            return None

        key = UserPlaylist.get_key_name(playlist, user_data)

        if insert_if_missing:
            return UserPlaylist.get_or_insert(
                        key_name = key,
                        user = user_data.user,
                        playlist = playlist)
        else:
            return UserPlaylist.get_by_key_name(key)

class UserVideo(db.Model):

    @staticmethod
    def get_key_name(video, user_data):
        return user_data.key_email + ":" + video.youtube_id

    @staticmethod
    def get_for_video_and_user_data(video, user_data, insert_if_missing=False):
        if not user_data:
            return None
        key = UserVideo.get_key_name(video, user_data)

        if insert_if_missing:
            return UserVideo.get_or_insert(
                        key_name = key,
                        user = user_data.user,
                        video = video,
                        duration = video.duration)
        else:
            return UserVideo.get_by_key_name(key)

    @staticmethod
    def count_completed_for_user_data(user_data):
        query = UserVideo.all()
        query.filter("user = ", user_data.user)
        query.filter("completed = ", True)
        return query.count(limit=10000)

    user = db.UserProperty()
    video = db.ReferenceProperty(Video)

    # Most recently watched second in video (playhead state)
    last_second_watched = db.IntegerProperty(default = 0, indexed=False)

    # Number of seconds actually spent watching this video, regardless of jumping around to various
    # scrubber positions. This value can exceed the total duration of the video if it is watched
    # many times, and it doesn't necessarily match the percent watched.
    seconds_watched = db.IntegerProperty(default = 0)

    last_watched = db.DateTimeProperty(auto_now_add = True)
    duration = db.IntegerProperty(default = 0, indexed=False)
    completed = db.BooleanProperty(default = False)

    @property
    def points(self):
        return points.VideoPointCalculator(self)

class VideoLog(db.Model):
    user = db.UserProperty()
    video = db.ReferenceProperty(Video)
    video_title = db.StringProperty(indexed=False)
    time_watched = db.DateTimeProperty(auto_now_add = True)
    seconds_watched = db.IntegerProperty(default = 0, indexed=False)
    last_second_watched = db.IntegerProperty(indexed=False)
    points_earned = db.IntegerProperty(default = 0, indexed=False)
    playlist_titles = db.StringListProperty(indexed=False)

    _serialize_blacklist = ["video"]

    @staticmethod
    def get_for_user_data_between_dts(user_data, dt_a, dt_b):
        query = VideoLog.all()
        query.filter('user =', user_data.user)

        query.filter('time_watched >=', dt_a)
        query.filter('time_watched <=', dt_b)
        query.order('time_watched')

        return query

    @staticmethod
    def get_for_user_data_and_video(user_data, video):
        query = VideoLog.all()

        query.filter('user =', user_data.user)
        query.filter('video =', video)

        query.order('time_watched')

        return query

    @staticmethod
    def add_entry(user_data, video, seconds_watched, last_second_watched):

        user_video = UserVideo.get_for_video_and_user_data(video, user_data, insert_if_missing=True)

        # Cap seconds_watched at duration of video
        seconds_watched = max(0, min(seconds_watched, video.duration))

        video_points_previous = points.VideoPointCalculator(user_video)

        action_cache=last_action_cache.LastActionCache.get_for_user_data(user_data)

        last_video_log = action_cache.get_last_video_log()

        # If the last video logged is not this video and the times being credited
        # overlap, don't give points for this video. Can only get points for one video
        # at a time.
        if last_video_log and last_video_log.key_for_video() != video.key():
            dt_now = datetime.datetime.now()
            if last_video_log.time_watched > (dt_now - datetime.timedelta(seconds=seconds_watched)):
                return (None, None, 0)

        video_log = VideoLog()
        video_log.user = user_data.user
        video_log.video = video
        video_log.video_title = video.title
        video_log.seconds_watched = seconds_watched
        video_log.last_second_watched = last_second_watched

        if seconds_watched > 0:
            if user_video.seconds_watched == 0:
                user_data.uservideocss_version += 1
                UserVideoCss.set_started(user_data, user_video.video, user_data.uservideocss_version)

            user_video.seconds_watched += seconds_watched
            user_data.total_seconds_watched += seconds_watched

            # Update seconds_watched of all associated UserPlaylists
            query = VideoPlaylist.all()
            query.filter('video =', video)
            query.filter('live_association = ', True)

            first_video_playlist = True
            for video_playlist in query:
                user_playlist = UserPlaylist.get_for_playlist_and_user_data(video_playlist.playlist, user_data, insert_if_missing=True)
                user_playlist.title = video_playlist.playlist.title
                user_playlist.seconds_watched += seconds_watched
                user_playlist.last_watched = datetime.datetime.now()
                user_playlist.put()

                video_log.playlist_titles.append(user_playlist.title)

                if first_video_playlist:
                    action_cache.push_video_log(video_log)

                util_badges.update_with_user_playlist(
                        user_data,
                        user_playlist,
                        include_other_badges = first_video_playlist,
                        action_cache = action_cache)

                first_video_playlist = False

        user_video.last_second_watched = last_second_watched
        user_video.last_watched = datetime.datetime.now()
        user_video.duration = video.duration

        user_data.record_activity(user_video.last_watched)

        video_points_total = points.VideoPointCalculator(user_video)
        video_points_received = video_points_total - video_points_previous

        if not user_video.completed and video_points_total >= consts.VIDEO_POINTS_BASE:
            # Just finished this video for the first time
            user_video.completed = True
            user_data.videos_completed = -1

            user_data.uservideocss_version += 1
            UserVideoCss.set_completed(user_data, user_video.video, user_data.uservideocss_version)

        if video_points_received > 0:
            video_log.points_earned = video_points_received
            user_data.add_points(video_points_received)

        db.put([user_video, user_data])

        # Defer the put of VideoLog for now, as we think it might be causing hot tablets
        # and want to shift it off to an automatically-retrying task queue.
        # http://ikaisays.com/2011/01/25/app-engine-datastore-tip-monotonically-increasing-values-are-bad/
        deferred.defer(commit_video_log, video_log,
                       _queue = "video-log-queue",
                       _url = "/_ah/queue/deferred_videolog")


        if user_data is not None and user_data.coaches:
            # Making a separate queue for the log summaries so we can clearly see how much they are getting used
            deferred.defer(commit_log_summary_coaches, video_log, user_data.coaches,
                _queue = "log-summary-queue",
                _url = "/_ah/queue/deferred_log_summary")

        return (user_video, video_log, video_points_total)

    def time_started(self):
        return self.time_watched - datetime.timedelta(seconds = self.seconds_watched)

    def time_ended(self):
        return self.time_watched

    def minutes_spent(self):
        return util.minutes_between(self.time_started(), self.time_ended())

    def key_for_video(self):
        return VideoLog.video.get_value_for_datastore(self)

# commit_video_log is used by our deferred video log insertion process
def commit_video_log(video_log, user_data = None):
    video_log.put()

class DailyActivityLog(db.Model):
    """ A log entry for a dashboard presented to users and coaches.

    This is used in the end-user-visible dashboards that display
    student activity and breaks down where the user is spending her time.

    """

    user = db.UserProperty()
    date = db.DateTimeProperty()
    activity_summary = object_property.ObjectProperty()

    @staticmethod
    def get_key_name(user_data, date):
        return "%s:%s" % (user_data.key_email, date.strftime("%Y-%m-%d-%H"))

    @staticmethod
    def build(user_data, date, activity_summary):
        log = DailyActivityLog(key_name=DailyActivityLog.get_key_name(user_data, date))
        log.user = user_data.user
        log.date = date
        log.activity_summary = activity_summary
        return log

    @staticmethod
    def get_for_user_data_between_dts(user_data, dt_a, dt_b):
        query = DailyActivityLog.all()
        query.filter('user =', user_data.user)

        query.filter('date >=', dt_a)
        query.filter('date <', dt_b)
        query.order('date')

        return query

class LogSummaryTypes:
    USER_ADJACENT_ACTIVITY = "UserAdjacentActivity"
    CLASS_DAILY_ACTIVITY = "ClassDailyActivity"

# Tracks the number of shards for each named log summary
class LogSummaryShardConfig(db.Model):
    name = db.StringProperty(required=True)
    num_shards = db.IntegerProperty(required=True, default=1)

    @staticmethod
    def increase_shards(name, num):
        """Increase the number of shards for a given sharded counter.
        Will never decrease the number of shards.

        Parameters:
        name - The name of the counter
        num - How many shards to use

        """
        config = LogSummaryShardConfig.get_or_insert(name, name=name)
        def txn():
            if config.num_shards < num:
                config.num_shards = num
                config.put()

        db.run_in_transaction(txn)

# can keep a variety of different types of summaries pulled from the logs
class LogSummary(db.Model):
    user = db.UserProperty()
    start = db.DateTimeProperty()
    end = db.DateTimeProperty()
    summary_type = db.StringProperty()
    summary = object_property.UnvalidatedObjectProperty()
    name = db.StringProperty(required=True)

    @staticmethod
    def get_start_of_period(activity, delta):
        date = activity.time_started()

        if delta == 1440:
            return datetime.datetime(date.year, date.month, date.day)

        if delta == 60:
            return datetime.datetime(date.year, date.month, date.day, date.hour)

        raise Exception("unhandled delta to get_key_name")

    @staticmethod
    def get_end_of_period(activity, delta):
        return LogSummary.get_start_of_period(activity, delta) + datetime.timedelta(minutes=delta)

    @staticmethod
    def get_name(user_data, summary_type, activity, delta):
        return LogSummary.get_name_by_dates(user_data, summary_type, LogSummary.get_start_of_period(activity, delta), LogSummary.get_end_of_period(activity, delta))

    @staticmethod
    def get_name_by_dates(user_data, summary_type, start, end):
        return "%s:%s:%s:%s" % (user_data.key_email, summary_type, start.strftime("%Y-%m-%d-%H-%M"), end.strftime("%Y-%m-%d-%H-%M"))

    # activity needs to have activity.time_started() and activity.time_done() functions
    # summary_class needs to have a method .add(activity)
    # delta is a time period in minutes
    @staticmethod
    def add_or_update_entry(user_data, activity, summary_class, summary_type, delta=30):

        if user_data is None:
            return

        def txn(name, shard_name, user_data, activities, summary_class, summary_type, delta):
                log_summary = LogSummary.get_by_key_name(shard_name)

                if log_summary is None:
                    activity = activities[0]

                    log_summary = LogSummary(key_name = shard_name, \
                                             name = name, \
                                             user = user_data.user, \
                                             start = LogSummary.get_start_of_period(activity, delta), \
                                             end = LogSummary.get_end_of_period(activity, delta), \
                                             summary_type = summary_type)

                    log_summary.summary = summary_class()

                for activity in activities:
                    log_summary.summary.add(user_data, activity)

                log_summary.put()


        # if activities is a list, we assume all activities belong to the same period - this is used in classtime.fill_class_summaries_from_logs()
        if type(activity) == list:
            activities = activity
            activity = activities[0]
        else:
            activities = [activity]

        name = LogSummary.get_name(user_data, summary_type, activity, delta)
        config = LogSummaryShardConfig.get_or_insert(name, name=name)

        index = random.randint(0, config.num_shards - 1)
        shard_name = str(index) + ":" + name


        # running function within a transaction because time might elapse between the get and the put
        # and two processes could get before either puts. Transactions will ensure that its mutually exclusive
        # since they are operating on the same entity
        try:
            db.run_in_transaction(txn, name, shard_name, user_data, activities, summary_class, summary_type, delta)
        except TransactionFailedError:
            # if it is a transaction lock
            logging.info("increasing the number of shards to %i log summary: %s" %(config.num_shards+1, name))
            LogSummaryShardConfig.increase_shards(name, config.num_shards+1)
            shard_name = str(config.num_shards) + ":" + name
            db.run_in_transaction(txn, name, shard_name, user_data, activities, summary_class, summary_type, delta)

    @staticmethod
    def get_description():
        return self.summary.description(self.start, self.end)

    @staticmethod
    def get_by_name(name):
        query = LogSummary.all()
        query.filter('name =', name)
        return query

# commit_log_summary is used by our deferred log summary insertion process
def commit_log_summary(activity_log, user_data):
    if user_data is not None:
        from classtime import  ClassDailyActivitySummary # putting this at the top would get a circular reference
        for coach in user_data.coaches:
            LogSummary.add_or_update_entry(UserData.get_from_db_key_email(coach), activity_log, ClassDailyActivitySummary, LogSummaryTypes.CLASS_DAILY_ACTIVITY, 1440)

# commit_log_summary is used by our deferred log summary insertion process
def commit_log_summary_coaches(activity_log, coaches):
    from classtime import  ClassDailyActivitySummary # putting this at the top would get a circular reference
    for coach in coaches:
        LogSummary.add_or_update_entry(UserData.get_from_db_key_email(coach), activity_log, ClassDailyActivitySummary, LogSummaryTypes.CLASS_DAILY_ACTIVITY, 1440)

class ProblemLog(db.Model):

    user = db.UserProperty()
    exercise = db.StringProperty()
    correct = db.BooleanProperty(default = False)
    time_done = db.DateTimeProperty(auto_now_add=True)
    time_taken = db.IntegerProperty(default = 0, indexed=False)
    hint_time_taken_list = db.ListProperty(int, indexed=False)
    hint_after_attempt_list = db.ListProperty(int, indexed=False)
    count_hints = db.IntegerProperty(default = 0, indexed=False)
    problem_number = db.IntegerProperty(default = -1) # Used to reproduce problems
    exercise_non_summative = db.StringProperty(indexed=False) # Used to reproduce problems from summative exercises
    hint_used = db.BooleanProperty(default = False, indexed=False)
    points_earned = db.IntegerProperty(default = 0, indexed=False)
    earned_proficiency = db.BooleanProperty(default = False) # True if proficiency was earned on this problem
    suggested = db.BooleanProperty(default = False) # True if the exercise was suggested to the user
    sha1 = db.StringProperty(indexed=False)
    seed = db.StringProperty(indexed=False)
    problem_type = db.StringProperty(indexed=False)
    count_attempts = db.IntegerProperty(default = 0, indexed=False)
    time_taken_attempts = db.ListProperty(int, indexed=False)
    attempts = db.StringListProperty(indexed=False)
    random_float = db.FloatProperty() # Add a random float in [0, 1) for easy random sampling
    ip_address = db.StringProperty(indexed=False)

    def put(self):
        if self.random_float is None:
            self.random_float = random.random()
        db.Model.put(self)

    @property
    def ka_url(self):
        return util.absolute_url("/exercises?exid=%s&problem_number=%s" % \
            (self.exercise, self.problem_number))

    @staticmethod
    def get_for_user_data_between_dts(user_data, dt_a, dt_b):
        query = ProblemLog.all()
        query.filter('user =', user_data.user)

        query.filter('time_done >=', dt_a)
        query.filter('time_done <', dt_b)

        query.order('time_done')

        return query

    def time_taken_capped_for_reporting(self):
        # For reporting's sake, we cap the amount of time that you can be considered to be
        # working on a single problem at 60 minutes. If you've left your browser open
        # longer, you're probably not actively working on the problem.
        return min(consts.MAX_WORKING_ON_PROBLEM_SECONDS, self.time_taken)

    def time_started(self):
        return self.time_done - datetime.timedelta(seconds = self.time_taken_capped_for_reporting())

    def time_ended(self):
        return self.time_done

    def minutes_spent(self):
        return util.minutes_between(self.time_started(), self.time_ended())

# commit_problem_log is used by our deferred problem log insertion process
def commit_problem_log(problem_log_source, user_data = None):
    try:
        if not problem_log_source or not problem_log_source.key().name:
            logging.critical("Skipping problem log commit due to missing problem_log_source or key().name")
            return
    except db.NotSavedError:
        # Handle special case during new exercise deploy
        logging.critical("Skipping problem log commit due to db.NotSavedError")
        return

    if problem_log_source.count_attempts > 1000:
        logging.info("Ignoring attempt to write problem log w/ attempts over 1000.")
        return

    # This does not have the same behavior as .insert(). This is used because
    # tasks can be run out of order so we extend the list as needed and insert
    # values.
    def insert_in_position(index, items, val, filler):
        if index >= len(items):
            items.extend([filler] * (index + 1 - len(items)))
        items[index] = val

    # Committing transaction combines existing problem log with any followup attempts
    def txn():
        problem_log = ProblemLog.get_by_key_name(problem_log_source.key().name())

        if not problem_log:
            problem_log = ProblemLog(
                key_name = problem_log_source.key().name(),
                user = problem_log_source.user,
                exercise = problem_log_source.exercise,
                problem_number = problem_log_source.problem_number,
                time_done = problem_log_source.time_done,
                sha1 = problem_log_source.sha1,
                seed = problem_log_source.seed,
                problem_type = problem_log_source.problem_type,
                suggested = problem_log_source.suggested,
                exercise_non_summative = problem_log_source.exercise_non_summative,
                ip_address = problem_log_source.ip_address,
        )

        problem_log.count_hints = max(problem_log.count_hints, problem_log_source.count_hints)
        problem_log.hint_used = problem_log.count_hints > 0
        index_attempt = max(0, problem_log_source.count_attempts - 1)

        # Bump up attempt count
        if problem_log_source.attempts[0] != "hint": # attempt
            if index_attempt < len(problem_log.time_taken_attempts) \
               and problem_log.time_taken_attempts[index_attempt] != -1:
                # This attempt has already been logged. Ignore this dupe taskqueue execution.
                logging.info("Skipping problem log commit due to dupe taskqueue\
                    execution for attempt: %s, key.name: %s" % \
                    (index_attempt, problem_log_source.key().name()))
                return

            problem_log.count_attempts += 1

            # Add time_taken for this individual attempt
            problem_log.time_taken += problem_log_source.time_taken
            insert_in_position(index_attempt, problem_log.time_taken_attempts, problem_log_source.time_taken, filler=-1)

            # Add actual attempt content
            insert_in_position(index_attempt, problem_log.attempts, problem_log_source.attempts[0], filler="")

            # Proficiency earned should never change per problem
            problem_log.earned_proficiency = problem_log.earned_proficiency or \
                problem_log_source.earned_proficiency

        else: # hint
            index_hint = max(0, problem_log_source.count_hints - 1)

            if index_hint < len(problem_log.hint_time_taken_list) \
               and problem_log.hint_time_taken_list[index_hint] != -1:
                # This attempt has already been logged. Ignore this dupe taskqueue execution.
                return

            # Add time taken for hint
            insert_in_position(index_hint, problem_log.hint_time_taken_list, problem_log_source.time_taken, filler=-1)

            # Add problem number this hint follows
            insert_in_position(index_hint, problem_log.hint_after_attempt_list, problem_log.count_attempts, filler=-1)

        # Points should only be earned once per problem, regardless of attempt count
        problem_log.points_earned = max(problem_log.points_earned, problem_log_source.points_earned)

        # Correct cannot be changed from False to True after first attempt
        problem_log.correct = (problem_log_source.count_attempts == 1 or problem_log.correct) and problem_log_source.correct and not problem_log.count_hints

        logging.info(problem_log.time_ended())
        problem_log.put()


    db.run_in_transaction(txn)

# Represents a matching between a playlist and a video
# Allows us to keep track of which videos are in a playlist and
# which playlists a video belongs to (not 1-to-1 mapping)


class VideoPlaylist(db.Model):

    playlist = db.ReferenceProperty(Playlist)
    video = db.ReferenceProperty(Video)
    video_position = db.IntegerProperty()

    # Lets us enable/disable video playlist relationships in bulk without removing the entry
    live_association = db.BooleanProperty(default = False)
    last_live_association_generation = db.IntegerProperty(default = 0)

    _VIDEO_PLAYLIST_KEY_FORMAT = "VideoPlaylist_Videos_for_Playlist_%s"
    _PLAYLIST_VIDEO_KEY_FORMAT = "VideoPlaylist_Playlists_for_Video_%s"

    @staticmethod
    def get_cached_videos_for_playlist(playlist, limit=500):

        key = VideoPlaylist._VIDEO_PLAYLIST_KEY_FORMAT % playlist.key()
        namespace = str(App.version) + "_" + str(Setting.cached_library_content_date())

        videos = memcache.get(key, namespace=namespace)

        if not videos:
            videos = []
            query = VideoPlaylist.all()
            query.filter('playlist =', playlist)
            query.filter('live_association = ', True)
            query.order('video_position')
            video_playlists = query.fetch(limit)
            for video_playlist in video_playlists:
                videos.append(video_playlist.video)

            memcache.set(key, videos, namespace=namespace)

        return videos

    @staticmethod
    def get_cached_playlists_for_video(video, limit=5):

        key = VideoPlaylist._PLAYLIST_VIDEO_KEY_FORMAT % video.key()
        namespace = str(App.version) + "_" + str(Setting.cached_library_content_date())

        playlists = memcache.get(key, namespace=namespace)

        if playlists is None:
            playlists = []
            query = VideoPlaylist.all()
            query.filter('video =', video)
            query.filter('live_association = ', True)
            video_playlists = query.fetch(limit)
            for video_playlist in video_playlists:
                playlists.append(video_playlist.playlist)

            memcache.set(key, playlists, namespace=namespace)

        return playlists

    @staticmethod
    def get_query_for_playlist_title(playlist_title):
        query = Playlist.all()
        query.filter('title =', playlist_title)
        playlist = query.get()
        query = VideoPlaylist.all()
        query.filter('playlist =', playlist)
        query.filter('live_association = ', True) #need to change this to true once I'm done with all of my hacks
        query.order('video_position')
        return query

    @staticmethod
    def get_key_dict(query):
        video_playlist_key_dict = {}
        for video_playlist in query.fetch(10000):
            playlist_key = VideoPlaylist.playlist.get_value_for_datastore(video_playlist)

            if not video_playlist_key_dict.has_key(playlist_key):
                video_playlist_key_dict[playlist_key] = {}

            video_playlist_key_dict[playlist_key][VideoPlaylist.video.get_value_for_datastore(video_playlist)] = video_playlist

        return video_playlist_key_dict

class ExerciseVideo(db.Model):

    video = db.ReferenceProperty(Video)
    exercise = db.ReferenceProperty(Exercise)
    exercise_order = db.IntegerProperty()

    def key_for_video(self):
        return ExerciseVideo.video.get_value_for_datastore(self)

    @staticmethod
    def get_key_dict(query):
        exercise_video_key_dict = {}
        for exercise_video in query.fetch(10000):
            video_key = ExerciseVideo.video.get_value_for_datastore(exercise_video)

            if not exercise_video_key_dict.has_key(video_key):
                exercise_video_key_dict[video_key] = {}

            exercise_video_key_dict[video_key][ExerciseVideo.exercise.get_value_for_datastore(exercise_video)] = exercise_video

        return exercise_video_key_dict

# UserExerciseCache is an optimized-for-read-and-deserialization cache of user-specific exercise states.
# It can be reconstituted at any time via UserExercise objects.
#
class UserExerciseCache(db.Model):

    # Bump this whenever you change the structure of the cached UserExercises and need to invalidate all old caches
    CURRENT_VERSION = 7

    version = db.IntegerProperty()
    dicts = object_property.UnvalidatedObjectProperty()

    def user_exercise_dict(self, exercise_name):
        return self.dicts.get(exercise_name) or UserExerciseCache.dict_from_user_exercise(None)

    def update(self, user_exercise):
        self.dicts[user_exercise.exercise] = UserExerciseCache.dict_from_user_exercise(user_exercise)

    @staticmethod
    def key_for_user_data(user_data):
        return "UserExerciseCache:%s" % user_data.key_email

    @staticmethod
    def get(user_data_or_list):
        if not user_data_or_list:
            raise Exception("Must provide UserData when loading UserExerciseCache")

        # We can grab a single UserExerciseCache or do an optimized grab of a bunch of 'em
        user_data_list = user_data_or_list if type(user_data_or_list) == list else [user_data_or_list]

        # Try to get 'em all by key name
        user_exercise_caches = UserExerciseCache.get_by_key_name(
                map(
                    lambda user_data: UserExerciseCache.key_for_user_data(user_data),
                    user_data_list),
                config=db.create_config(read_policy=db.EVENTUAL_CONSISTENCY)
                )

        # For any that are missing or are out of date,
        # build up asynchronous queries to repopulate their data
        async_queries = []
        for i, user_exercise_cache in enumerate(user_exercise_caches):
            if not user_exercise_cache or user_exercise_cache.version != UserExerciseCache.CURRENT_VERSION:
                # This user's cached graph is missing or out-of-date,
                # put it in the list of graphs to be regenerated.
                async_queries.append(UserExercise.get_for_user_data(user_data_list[i]))

        if len(async_queries) > 0:

            # Run the async queries
            results = util.async_queries(async_queries)
            caches_to_put = []
            exercises = Exercise.get_all_use_cache()

            # Populate the missing graphs w/ results from async queries
            index_result = 0
            for i, user_exercise_cache in enumerate(user_exercise_caches):
                if not user_exercise_cache or user_exercise_cache.version != UserExerciseCache.CURRENT_VERSION:
                    user_data = user_data_list[i]
                    user_exercises = results[index_result].get_result()

                    user_exercise_cache = UserExerciseCache.generate(user_data, user_exercises)

                    if len(caches_to_put) < 10:
                        # We only put 10 at a time in case a teacher views a report w/ tons and tons of uncached students
                        caches_to_put.append(user_exercise_cache)

                    user_exercise_caches[i] = user_exercise_cache

                    index_result += 1

            if len(caches_to_put) > 0:
                # Fire off an asynchronous put to cache the missing results. On the production server,
                # we don't wait for the put to finish before dealing w/ the rest of the request
                # because we don't really care if the cache misses.
                future_put = db.put_async(caches_to_put)

                if App.is_dev_server:
                    # On the dev server, we have to explicitly wait for get_result in order to
                    # trigger the put (not truly asynchronous).
                    future_put.get_result()

        if not user_exercise_caches:
            return []

        # Return list of caches if a list was passed in,
        # otherwise return single cache
        return user_exercise_caches if type(user_data_or_list) == list else user_exercise_caches[0]

    @staticmethod
    def dict_from_user_exercise(user_exercise):
        return {
                "streak": user_exercise.streak if user_exercise else 0,
                "longest_streak": user_exercise.longest_streak if user_exercise else 0,
                "progress": user_exercise.progress if user_exercise else 0.0,
                "total_done": user_exercise.total_done if user_exercise else 0,
                "last_done": user_exercise.last_done if user_exercise else datetime.datetime.min,
                "last_review": user_exercise.last_review if user_exercise else datetime.datetime.min,
                "review_interval_secs": user_exercise.review_interval_secs if user_exercise else 0,
                "proficient_date": user_exercise.proficient_date if user_exercise else 0,
                }

    @staticmethod
    def generate(user_data, user_exercises=None):

        if not user_exercises:
            user_exercises = UserExercise.get_for_user_data(user_data)

        dicts = {}

        # Build up cache
        for user_exercise in user_exercises:

            user_exercise_dict = UserExerciseCache.dict_from_user_exercise(user_exercise)

            # In case user has multiple UserExercise mappings for a specific exercise,
            # always prefer the one w/ more problems done
            if user_exercise.exercise not in dicts or dicts[user_exercise.exercise]["total_done"] < user_exercise_dict["total_done"]:
                dicts[user_exercise.exercise] = user_exercise_dict

        return UserExerciseCache(
                key_name = UserExerciseCache.key_for_user_data(user_data),
                version = UserExerciseCache.CURRENT_VERSION,
                dicts = dicts,
            )

class UserExerciseGraph(object):

    def __init__(self, graph={}, cache=None):
        self.graph = graph
        self.cache = cache

    def graph_dict(self, exercise_name):
        return self.graph.get(exercise_name)

    def graph_dicts(self):
        return sorted(sorted(self.graph.values(), key=lambda graph_dict: graph_dict["v_position"]), key=lambda graph_dict: graph_dict["h_position"])

    def proficient_exercise_names(self):
        return [graph_dict["name"] for graph_dict in self.proficient_graph_dicts()]

    def suggested_exercise_names(self):
        return [graph_dict["name"] for graph_dict in self.suggested_graph_dicts()]

    def review_exercise_names(self):
        return [graph_dict["name"] for graph_dict in self.review_graph_dicts()]

    def suggested_graph_dicts(self):
        return [graph_dict for graph_dict in self.graph_dicts() if graph_dict["suggested"]]

    def proficient_graph_dicts(self):
        return [graph_dict for graph_dict in self.graph_dicts() if graph_dict["proficient"]]

    def recent_graph_dicts(self, n_recent=2):
        return sorted(
                [graph_dict for graph_dict in self.graph_dicts() if graph_dict["last_done"]],
                reverse=True,
                key=lambda graph_dict: graph_dict["last_done"],
                )[0:n_recent]

    def review_graph_dicts(self):

        # an exercise ex should be reviewed iff all of the following are true:
        #   * ex and all of ex's covering ancestors either
        #      * are scheduled to have their next review in the past, or
        #      * were answered incorrectly on last review (i.e. streak == 0 with proficient == true)
        #   * none of ex's covering ancestors should be reviewed
        #   * the user is proficient at ex
        # the algorithm:
        #   for each exercise:
        #     traverse it's ancestors, computing and storing the next review time (if not already done),
        #     using now as the next review time if proficient and streak==0
        #   select and mark the exercises in which the user is proficient but with next review times in the past as review candidates
        #   for each of those candidates:
        #     traverse it's ancestors, computing and storing whether an ancestor is also a candidate
        #   all exercises that are candidates but do not have ancestors as candidates should be listed for review

        now = datetime.datetime.now()

        def compute_next_review(graph_dict):
            if graph_dict.get("next_review") is None:
                graph_dict["next_review"] = datetime.datetime.min

                if graph_dict["total_done"] > 0 and graph_dict["last_review"] > datetime.datetime.min:
                    next_review = graph_dict["last_review"] + UserExercise.get_review_interval_from_seconds(graph_dict["review_interval_secs"])

                    if next_review > now and graph_dict["proficient"] and graph_dict["streak"] == 0:
                        next_review = now

                    if next_review > graph_dict["next_review"]:
                        graph_dict["next_review"] = next_review

                for covering_graph_dict in graph_dict["coverer_dicts"]:
                    covering_next_review = compute_next_review(covering_graph_dict)
                    if covering_next_review > graph_dict["next_review"]:
                        graph_dict["next_review"] = covering_next_review

            return graph_dict["next_review"]

        def compute_is_ancestor_review_candidate(graph_dict):
            if graph_dict.get("is_ancestor_review_candidate") is None:

                graph_dict["is_ancestor_review_candidate"] = False

                for covering_graph_dict in graph_dict["coverer_dicts"]:
                    graph_dict["is_ancestor_review_candidate"] = (graph_dict["is_ancestor_review_candidate"] or
                            covering_graph_dict["is_review_candidate"] or
                            compute_is_ancestor_review_candidate(covering_graph_dict))

            return graph_dict["is_ancestor_review_candidate"]

        for graph_dict in self.graph_dicts():
            compute_next_review(graph_dict)

        candidate_dicts = []
        for graph_dict in self.graph_dicts():
            if not graph_dict["summative"] and graph_dict["proficient"] and graph_dict["next_review"] <= now:
                graph_dict["is_review_candidate"] = True
                candidate_dicts.append(graph_dict)
            else:
                graph_dict["is_review_candidate"] = False

        review_dicts = []
        for graph_dict in candidate_dicts:
            if not compute_is_ancestor_review_candidate(graph_dict):
                review_dicts.append(graph_dict)

        return review_dicts

    def states(self, exercise_name):
        graph_dict = self.graph_dict(exercise_name)

        return {
            "proficient": graph_dict["proficient"],
            "suggested": graph_dict["suggested"],
            "struggling": graph_dict["struggling"],
            "endangered": graph_dict["endangered"],
            "summative": graph_dict["summative"],
            "reviewing": graph_dict in self.review_graph_dicts(),
        }

    @staticmethod
    def current():
        return UserExerciseGraph.get(UserData.current())

    @staticmethod
    def get(user_data_or_list):
        if not user_data_or_list:
            return [] if type(user_data_or_list) == list else None

        # We can grab a single UserExerciseGraph or do an optimized grab of a bunch of 'em
        user_data_list = user_data_or_list if type(user_data_or_list) == list else [user_data_or_list]

        user_exercise_cache_list = UserExerciseCache.get(user_data_list)

        if not user_exercise_cache_list:
            return [] if type(user_data_or_list) == list else None

        exercise_dicts = UserExerciseGraph.exercise_dicts()

        user_exercise_graphs = map(
                lambda (user_data, user_exercise_cache): UserExerciseGraph.generate(user_data, user_exercise_cache, exercise_dicts),
                itertools.izip(user_data_list, user_exercise_cache_list))

        # Return list of graphs if a list was passed in,
        # otherwise return single graph
        return user_exercise_graphs if type(user_data_or_list) == list else user_exercise_graphs[0]

    @staticmethod
    def dict_from_exercise(exercise):
        return {
                "name": exercise.name,
                "display_name": exercise.display_name,
                "h_position": exercise.h_position,
                "v_position": exercise.v_position,
                "summative": exercise.summative,
                "struggling_threshold": exercise.struggling_threshold(),
                "num_milestones": exercise.num_milestones,
                "proficient": None,
                "explicitly_proficient": None,
                "suggested": None,
                "struggling": None,
                "endangered": None,
                "prerequisites": map(lambda exercise_name: {"name": exercise_name, "display_name": Exercise.to_display_name(exercise_name)}, exercise.prerequisites),
                "covers": exercise.covers,
            }

    @staticmethod
    def exercise_dicts():
        return map(UserExerciseGraph.dict_from_exercise, Exercise.get_all_use_cache())

    @staticmethod
    def get_and_update(user_data, user_exercise):
        user_exercise_cache = UserExerciseCache.get(user_data)
        user_exercise_cache.update(user_exercise)
        return UserExerciseGraph.generate(user_data, user_exercise_cache, UserExerciseGraph.exercise_dicts())

    @staticmethod
    def generate(user_data, user_exercise_cache, exercise_dicts):

        graph = {}

        # Build up base of graph
        for exercise_dict in exercise_dicts:

            user_exercise_dict = user_exercise_cache.user_exercise_dict(exercise_dict["name"])

            graph_dict = {}

            graph_dict.update(user_exercise_dict)
            graph_dict.update(exercise_dict)
            graph_dict.update({
                "coverer_dicts": [],
                "prerequisite_dicts": [],
            })

            # TODO(david): Use accuracy to determine when struggling
            graph_dict["struggling"] = (graph_dict["streak"] == 0 and
                    not graph_dict["proficient_date"] and
                    graph_dict["total_done"] > graph_dict["struggling_threshold"])

            # In case user has multiple UserExercise mappings for a specific exercise,
            # always prefer the one w/ more problems done
            if graph_dict["name"] not in graph or graph[graph_dict["name"]]["total_done"] < graph_dict["total_done"]:
                graph[graph_dict["name"]] = graph_dict

        # Cache coverers and prereqs for later
        for graph_dict in graph.values():
            # Cache coverers
            for covered_exercise_name in graph_dict["covers"]:
                covered_graph_dict = graph.get(covered_exercise_name)
                if covered_graph_dict:
                    covered_graph_dict["coverer_dicts"].append(graph_dict)

            # Cache prereqs
            for prerequisite_exercise_name in graph_dict["prerequisites"]:
                prerequisite_graph_dict = graph.get(prerequisite_exercise_name["name"])
                if prerequisite_graph_dict:
                    graph_dict["prerequisite_dicts"].append(prerequisite_graph_dict)

        # Set explicit proficiencies
        for exercise_name in user_data.proficient_exercises:
            graph_dict = graph.get(exercise_name)
            if graph_dict:
                graph_dict["proficient"] = graph_dict["explicitly_proficient"] = True

        # Calculate implicit proficiencies
        def set_implicit_proficiency(graph_dict):
            if graph_dict["proficient"] is not None:
                return graph_dict["proficient"]

            graph_dict["proficient"] = False

            # Consider an exercise implicitly proficient if the user has
            # never missed a problem and a covering ancestor is proficient
            if graph_dict["streak"] == graph_dict["total_done"]:
                for covering_graph_dict in graph_dict["coverer_dicts"]:
                    if set_implicit_proficiency(covering_graph_dict):
                        graph_dict["proficient"] = True
                        break

            return graph_dict["proficient"]

        for exercise_name in graph:
            set_implicit_proficiency(graph[exercise_name])

        # Calculate suggested
        def set_suggested(graph_dict):
            if graph_dict["suggested"] is not None:
                return graph_dict["suggested"]

            # Don't suggest already-proficient exercises
            if graph_dict["proficient"]:
                graph_dict["suggested"] = False
                return graph_dict["suggested"]

            # First, assume we're suggesting this exercise
            graph_dict["suggested"] = True

            # Don't suggest exercises that are covered by other suggested exercises
            for covering_graph_dict in graph_dict["coverer_dicts"]:
                if set_suggested(covering_graph_dict):
                    graph_dict["suggested"] = False
                    return graph_dict["suggested"]

            # Don't suggest exercises if the user isn't proficient in all prerequisites
            for prerequisite_graph_dict in graph_dict["prerequisite_dicts"]:
                if not prerequisite_graph_dict["proficient"]:
                    graph_dict["suggested"] = False
                    return graph_dict["suggested"]

            return graph_dict["suggested"]

        def set_endangered(graph_dict):
            graph_dict["endangered"] = (graph_dict["proficient"] and
                    graph_dict["streak"] == 0 and
                    graph_dict["proficient_date"] is not None)

        for exercise_name in graph:
            set_suggested(graph[exercise_name])
            set_endangered(graph[exercise_name])

        return UserExerciseGraph(graph = graph, cache=user_exercise_cache)

from badges import util_badges, last_action_cache
from phantom_users import util_notify