Source code for smoofit.utils

import jax.numpy as jnp
from jax.config import config
import jax.ops as jops
import numpy as np

from typing import Dict, Callable, Optional, List, Tuple, Any, Union
import re

do_jit = True
debug = False

# do_jit = False
# debug = True
if not do_jit:
    print("Disabling JITting!")
    config.update('jax_disable_jit', True)
# config.update("jax_debug_nans", True)

def to_np(x, arr=True):
    if isinstance(x, np.ndarray) and (arr and len(x.shape)):
        return x
    if (not hasattr(x, "__len__") or (hasattr(x, "shape") and len(x.shape) == 0)) and arr:
        x = [x]
    return np.asarray(x)

def to_jnp(x, arr=True):
    if isinstance(x, jnp.DeviceArray) and (arr and len(x.shape)):
        return x
    if (not hasattr(x, "__len__") or (hasattr(x, "shape") and len(x.shape) == 0)) and arr:
        x = [x]
    return jnp.asarray(x)

def match(name_item, idx_item, list_len, spec_list):
    for v in spec_list:
        if isinstance(v, int):
            if v == idx_item:
                return True
        if isinstance(v, str):
            if re.match(v, name_item):
                return True
        if isinstance(v, slice):
            s = np.arange(list_len)[v]
            if idx_item in s:
                return True
    return False

[docs]def build_1d_tunfold_matrix(n_bins: int,
                            bin_widths: Union[List[float], np.ndarray, jnp.DeviceArray] = None) -> jnp.DeviceArray:
    """Build matrix of discrete second derivatives for TUnfold-like regularization in 1 dimension

    The second derivative operator of a 1D array with ``n`` entries is the ``(n-2,n)`` matrix

    .. math::

        \\begin{pmatrix}
            1 & -2 & 1 & 0 & \\dots & 0 \\\\
            0 & 1 & -2 & 1 & \\dots & 0 \\\\
            \\dots & & & & & \\\\
            0 & \\dots & 0 & 1 & -2 & 1
        \\end{pmatrix}

    This assumes uniform spacing (bin width) of the entries. If that is not the case, the estimation of the derivatives should
    take that into account.
    Given the differential cross sections :math:`s_{i-1}`, :math:`s_i`, :math:`s_{i+1}`, and the corresponding
    bin widths :math:`w_{i-1}`, :math:`w_{i}`, and :math:`w_{i+1}`, the distances between the bin centers are therefore
    :math:`d_i = 0.5(w_{i-1} + w_i)` and :math:`d_{i+1} = 0.5(w_i + w_{i+1})`, and the second derivative in :math:`i` is estimated as:

    .. math::

        s_{i-1} \\frac{2}{d_i (d_i + d_{i+1})} + s_i \\frac{4}{d_i d_{i+1} (d_i + d_{i+1})} + s_{i+1} \\frac{2}{d_{i+1} (d_i + d_{i+1})}

    :param n_bins: number of elements in the vector for which the second derivative is to be computed (should be :math:`\geq 3`)
    :param bin_widths: 1D array with ``n_bins`` entries specifying the bin widths of the distribution

    :returns: matrix of second derivatives
    """
    assert(n_bins >= 3)
    if bin_widths is not None:
        assert(bin_widths.shape[0] == n_bins)
    L = np.zeros((n_bins-2, n_bins))
    for row in range(n_bins-2):
        left = 1.
        middle = -2.
        right = 1.
        if bin_widths is not None:
            d_low = 0.5 * (bin_widths[row] + bin_widths[row+1])
            d_high = 0.5 * (bin_widths[row+1] + bin_widths[row+2])
            avg = 0.5 * (d_low + d_high)
            left = 1./(d_low * avg)
            middle = 2./(d_low * d_high * avg)
            right = 1./(d_high * avg)
        L[row, row] = left
        L[row, row+1] = middle
        L[row, row+2] = right
    return to_jnp(L)

[docs]class Dim6EFTMorphing:
    """A class for morphing signals in a dimension-6 EFT

    Instances of this class are callable and are meant to be passed to
    :py:meth:`smoofit.model.Process.scale_by_fn`.

    In a dim.-6 EFT with :math:`n` operators, contributions (cross sections, yields...) scale like:

    .. math::

        H(\\vec{c}) = H_0 + \\sum_{i=1}^n c_i H_i + \\sum_{i=1,j=1,i \\leq j \\leq n} c_i c_j H_{ij}

    where :math:`H_0` is the SM prediction, :math:`\\vec{c}`` are the Wilson coefficients,
    :math:`H_i` are the interferences between the SM and operator :math:`i`, and :math:`H_{ij}`
    are pure-EFT contributions (operator-squared and operator-operator interfence).

    If you have at your disposal a set of templates :math:`B_k = H(\\vec{c}_k)` (for instance,
    you have event weights for each :math:`\\vec{c}_k`, and have filled separate histograms
    using each set of weights), with :math:`k = 1 \\dots M = 1+n+n(n+1)/2`, it is possible to morph
    these templates into the prediction for any value of :math:`\\vec{c}`:

    .. math::

        H(\\vec{c}) = \\sum_{k=1}^M f_k(\\vec{c}) B_k

    where:

    .. math::

        f_k(\\vec{c}) &= \\sum_{l=1}^M w_l(\\vec{c}) W_{lk}^{-1}, \\\\
        \\vec{w}(\\vec{c}) &= (1, \\, c_1, \\dots, c_n, \\, c_1 c_1, c_1 c_2, \\dots , c_1 c_n, c_2 c_2, c_2 c_3, \\dots , c_n c_n ), \\\\
        W_{lk} &= w_l(\\vec{c}_k)

    The process this scaling function is attached to should therefore have :math:`M` sub-processes,
    matching the :math:`B_k` above. Note that the sum over :math:`k` is not performed by this morphing function,
    but by the model when summing all processes and sub-process together. Hence, systematic uncertainties are still applied
    separately to each sub-contribution :math:`B_k`.
    """
[docs]    def __init__(self, var, basis_points: np.ndarray):
        """Constructor

        :param var: the :py:class:`smoofit.model.Variable` representing the ``n`` Wilson coefficients
        :param basis_points: the values of the Wilson coefficients used for filling each basis template, as a 2D array with shape ``(M,n)``
        """
        self.var = var
        self.n_ops = var.dim
        self.n_points = 1 + var.dim + var.dim * (var.dim + 1) // 2
        assert(basis_points.shape[1] == var.dim and basis_points.shape[0] == self.n_points)
        self.basis_points = to_np(basis_points) # kl, k=point, l=operator

        self.weights = np.ones((self.n_points, self.n_points))
        for i in range(self.n_points):
            self.weights[i,:] = self._build_morphing_vector(self.basis_points[i,:])
        self.morphing_matrix = jnp.linalg.inv(self.weights)

    def _build_morphing_vector(self, point):
        A = point[:,jnp.newaxis] * point
        cross = A[jnp.triu_indices(self.n_ops)]
        return jnp.hstack(([1.], point, cross))

[docs]    def __call__(self, values: jnp.DeviceArray) -> jnp.DeviceArray:
        """Morphing functions

        :param values: array of shape ``(n,)`` with the values of the Wilson coefficients :math:`\\vec{c}`

        :returns: array of shape ``(M,1)`` where entry ``[k,0]`` is equal to :math:`f_k(\\vec{c})`
        """
        factors = jnp.dot(self._build_morphing_vector(values), self.morphing_matrix)
        return jnp.expand_dims(factors, -1) # for broadcasting over bins