Source code for narmer.stats

# -*- coding: utf-8 -*-

# Copyright 2015 by Christopher C. Little.
# This file is part of Narmer.
#
# Narmer is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Narmer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Narmer. If not, see <http://www.gnu.org/licenses/>.

"""narmer.stats.

The stats module defines functions for calculating various statistical data
about linguistic objects, including:

    - Weissman score calculation
"""

from __future__ import division, unicode_literals

import math
import sys


[docs]def weissman(r_tar, t_tar, r_src, t_src, alpha=1.0): r"""Calculate Weissman score based on entered statistics. The score is: :math:`W = α \\cdot \\frac{r_{tar}}{r_{src}} \\cdot \\frac{log t_{src}}{log t_{tar}}` In practice, the score can be used to rate time-intensive tasks on the basis of other metrics, also, e.g. :math:`F_1` score. Sources: http://spectrum.ieee.org/view-from-the-valley/computing/software/a-madefortv-compression-metric-moves-to-the-real-world :param float r_tar: the target algorithm's compression ratio :param float t_tar: the target algorithm's compression time :param float r_src: a standard algorithm's compression ratio :param float t_src: a standard algorithm's compression time :param float alpha: a scaling constant (1.0 by default) :returns: the Weissman score :rtype: float >>> weissman(1, 1, 1, 1) 1.0 >>> weissman(1, 1, 1, 5) 7248263982714164.0 >>> weissman(1.2, 1.6, 4.8, 5) 0.8560773855177113 >>> weissman(1, 1, 1, 1, alpha=2) 2.0 >>> weissman(1.2, 1.6, 4.8, 5, alpha=2) 1.7121547710354226 """ if t_tar <= 0 or t_src <= 0: raise ValueError('Compression times must be positive values.') elif r_tar <= 0 or r_src <= 0: raise ValueError('Compression ratios must be positive values.') elif t_src == t_tar: return alpha * (r_tar / r_src) elif t_tar == 1: # if t_tar == 1, add epsilon to avoid division by log(1) = 0 t_tar += sys.float_info.epsilon if r_src == r_tar: return alpha * (math.log(t_src) / math.log(t_tar)) return alpha * (r_tar / r_src) * (math.log(t_src) / math.log(t_tar))