"""
This is a demonstration file to explain how to
use an object analysis statistics.

#. Import a dataset;
#. Create a ROI from this dataset, having multiple disconnected 3D objects;
#. Right-click on the ROI and select *New Connectivity Multi-ROI Analysis...*;
#. Open the *Statistical Properties* panel (button in the the top-left section of the *Object Analysis* window);
#. In the left column (*Available datasets:*), select the item with the title of the dataset;
#. In the right column (*Statistical properties:*), select the items *Outlier count* and *Outlier fraction*;
#. Press the *OK* button to compute these statistics.

:author: ORS Team
:contact: http://theobjects.com
:email: info@theobjects.com
:organization: Object Research Systems (ORS), Inc.
:address: 760 St-Paul West, suite 101, Montréal, Québec, Canada, H3C 1M4
:copyright: Object Research Systems (ORS), Inc. All rights reserved 2020.
:date: Oct 16 2017 13:46
:dragonflyVersion: 3.1.0.307 (D)
:UUID: eadba00cb29911e79651448a5b5d70c0
"""

__version__ = '1.0.0'

import numpy as np

from ORSModel import orsObj, Channel, ROI
from OrsHelpers.arrayhelper import ArrayHelper
from OrsPythonPlugins.OrsObjectAnalysis.PythonScriptsStatisticsGenerators.StatisticsGeneratorAbstract import StatisticsGeneratorAbstract


class DemoObjectAnalysisStatisticsOutliers_eadba00cb29911e79651448a5b5d70c0(StatisticsGeneratorAbstract):

    _tauTable = {
        3: 1.1511,
        4: 1.4250,
        5: 1.5712,
        6: 1.6563,
        7: 1.7110,
        8: 1.7491,
        9: 1.7770,
        10: 1.7984,
        11: 1.8153,
        12: 1.8290,
        13: 1.8403,
        14: 1.8498,
        15: 1.8579,
        16: 1.8649,
        17: 1.8710,
        18: 1.8764,
        19: 1.8811,
        20: 1.8853,
        21: 1.8891,
        22: 1.8926,
        23: 1.8957,
        24: 1.8985,
        25: 1.9011,
        26: 1.9035,
        27: 1.9057,
        28: 1.9078,
        29: 1.9096,
        30: 1.9114,
        31: 1.9130,
        32: 1.9146,
        33: 1.9160,
        34: 1.9174,
        35: 1.9186,
        36: 1.9198,
        37: 1.9209,
        38: 1.9220,
        40: 1.9240,
        42: 1.9257,
        44: 1.9273,
        46: 1.9288,
        48: 1.9301,
        50: 1.9314,
        55: 1.9340,
        60: 1.9362,
        65: 1.9381,
        70: 1.9397,
        80: 1.9423,
        90: 1.9443,
        100: 1.9459,
        200: 1.9530,
        500: 1.9572,
        1000: 1.9586,
        5000: 1.9597}

    _tauNInterpolation = [38, 40, 42, 44, 46, 48, 50, 55, 60, 65, 70, 80, 90, 100, 200, 500, 1000, 5000]

    def __init__(self):
        super().__init__()

        self.outputTagsWithDataset = ['Outlier count',
                                      'Outlier fraction']

    @classmethod
    def getUUID(cls):
        return "eadba00cb29911e79651448a5b5d70c0"

    def getOutputTagsWithDataset(self):
        return self.outputTagsWithDataset

    def generateOutputsWithDataset(self, datasetGUID, multiROIGUID, listTagsRequested=None, IProgress=None):
        aMultiROI = orsObj(multiROIGUID)  # Get the multi roi object from his guid
        aDataset = orsObj(datasetGUID)
        npArrayDataset = aDataset.getNDArray()

        # Compute a numpy array of all multi ROI non empty labels
        arrayNonEmptyLabels = aMultiROI.getNonEmptyLabels(None)
        npArrayNonEmptyLabels = ArrayHelper.ConvertOrsToNumpyArray(arrayNonEmptyLabels)
        arrayNonEmptyLabels.setCallbacksEnabled(False)

        # Here is an integrated cache mechanic. The function readScalarValues check if the statistical properties had
        # already been previously computed. If so, it fills the npArrayDictToReturn dictionary with the computed values
        # and remove the corresponding statistical properties tag from listTagsRemaining
        listTagsRemaining = listTagsRequested
        if listTagsRemaining is None:
            listTagsRemaining = self.getOutputTagsWithDataset()
        npArrayDictToReturn = self.readScalarValues(aMultiROI, npArrayNonEmptyLabels, listTagsRemaining)

        if len(listTagsRemaining) == 0:
            # All outputs were found in scalar values. Immediate return.
            arrayNonEmptyLabels.deleteObject()
            return npArrayDictToReturn

        # At last, we have to compute statistical properties that hadn't been previously computed
        nonEmptyLabelCount = len(npArrayNonEmptyLabels)

        # Filling the dictionary with the remaining tags to compute
        for tag in listTagsRemaining:
            npArrayDictToReturn[tag] = (np.zeros((nonEmptyLabelCount,), dtype=float), True)

        # Creating temporary ROI to manipulate each object
        ROICurrentObject = ROI()
        ROICurrentObject.copyShapeFromStructuredGrid(aMultiROI)
        ROICurrentObject.setCallbacksEnabled(False)

        # Creating temporary Channel to use as a mask over the data
        ChannelMaskCurrentObject = Channel()
        ChannelMaskCurrentObject.copyShapeFromStructuredGrid(aMultiROI)
        ChannelMaskCurrentObject.initializeDataForUCHAR()
        ChannelMaskCurrentObject.setCallbacksEnabled(False)
        npArrayMaskCurrentObject = ChannelMaskCurrentObject.getNDArray()

        # Progress
        if nonEmptyLabelCount <= 100:
            labelStepRefreshingProgress = 1
        else:
            labelStepRefreshingProgress = int(nonEmptyLabelCount / 100)
        nextLabelUpdateProgress = labelStepRefreshingProgress

        # Getting the arrays to fill
        arrayOutlierCount = None  # Initialization
        if 'Outlier count' in listTagsRemaining:
            arrayOutlierCount = npArrayDictToReturn['Outlier count'][0]

        arrayOutlierFraction = None  # Initialization
        if 'Outlier fraction' in listTagsRemaining:
            arrayOutlierFraction = npArrayDictToReturn['Outlier fraction'][0]

        # Computing the values
        tStepTempROI = 0
        for iLabel in range(nonEmptyLabelCount):
            currentLabel = arrayNonEmptyLabels.at(iLabel)

            # Getting the current object as a ROI
            aMultiROI.addToVolumeROI(ROICurrentObject, currentLabel)

            # Getting the volume in voxel
            volumeInVoxels = ROICurrentObject.getVoxelCount(tStepTempROI)

            # Creating the mask
            ChannelMaskCurrentObject.overwriteValueWithROI(ROICurrentObject, 1)

            # Extracting the array of data of this object
            npArrayDataObject = npArrayDataset[npArrayMaskCurrentObject == 1]

            # Getting the count of outliers
            outlierCount = self._getOutlierCount(npArrayDataObject)

            if arrayOutlierCount is not None:
                # Value of outlier count is pure number
                arrayOutlierCount[iLabel] = outlierCount

            if arrayOutlierFraction is not None:
                # Value of outlier fraction is pure number
                arrayOutlierFraction[iLabel] = outlierCount / volumeInVoxels

            # Clearing the ROI and Channel mask for the next iteration
            ROICurrentObject.clear()
            ChannelMaskCurrentObject.setAllData(0)

            if IProgress is not None:
                if IProgress.getIsCancelled():
                    break
                else:
                    if iLabel == nextLabelUpdateProgress:
                        IProgress.updateProgress(int((iLabel + 1) / nonEmptyLabelCount * 100))
                        nextLabelUpdateProgress += labelStepRefreshingProgress

        if not IProgress.getIsCancelled():
            # Computation is complete for all remaining statistics
            for tag in listTagsRemaining:
                npArrayDictToReturn[tag] = (npArrayDictToReturn[tag][0], False)

        ROICurrentObject.deleteObject()
        ChannelMaskCurrentObject.deleteObject()
        arrayNonEmptyLabels.deleteObject()

        return npArrayDictToReturn

    def updateDefaultDimensionUnit(self, outputTag, view):
        pass

    def getDefaultDimensionUnitDict(self):
        if self.defaultDimensionUnitDict is None:
            self.defaultDimensionUnitDict = {}  # Initialization
        return self.defaultDimensionUnitDict

    def getDataDimensionUnitDict(self):
        if self.dataDimensionUnitDict is None:
            self.dataDimensionUnitDict = {}  # Initialization
        return self.dataDimensionUnitDict

    def _getTauValue(self, n):
        if n < 3:
            # Unexpected
            return 0

        # Value over the maximum value of the table
        if n > 5000:
            return 1.9600

        # Value exactly found in table
        if n in self._tauTable:
            return self._tauTable[n]

        # Approximation found by linear interpolation from the table values
        indexFound = False
        lowerIndexInTauNInterpolation = 0  # Counter initialization
        while not indexFound:
            testTauNHigher = self._tauNInterpolation[lowerIndexInTauNInterpolation + 1]
            if n < testTauNHigher:
                indexFound = True
            else:
                lowerIndexInTauNInterpolation += 1

        tauNLower = self._tauNInterpolation[lowerIndexInTauNInterpolation]
        tauNHigher = self._tauNInterpolation[lowerIndexInTauNInterpolation + 1]
        tauValueLower = self._tauTable[tauNLower]
        tauValueHigher = self._tauTable[tauNHigher]

        tauValue = tauValueLower + (tauValueHigher-tauValueLower) * (n-tauNLower)/(tauNHigher-tauNLower)
        return tauValue

    def _getOutlierCount(self, npArrayDataUnsorted):
        # Sorting the array of data, so that we can analyze the first remaining and the last remaining
        # values
        npArrayData = np.sort(npArrayDataUnsorted)

        # Loop for identification of the outliers according to the modified Thompson Tau Test
        continueIdentificationOutliers = True
        outlierCount = 0  # Initialization
        while continueIdentificationOutliers:
            n = npArrayData.size
            if n <= 2:
                continueIdentificationOutliers = False
            else:
                # Finding the sample mean and standard deviation
                mean = np.mean(npArrayData)
                std = np.std(npArrayData)

                # Identifying if the first or the last sample is to be used for the current test
                deltaFirstSample = mean - npArrayData[0]  # This result is >= 0
                deltaLastSample = npArrayData[-1] - mean  # This result is >= 0

                usingLastSample = deltaLastSample >= deltaFirstSample
                if usingLastSample:
                    delta = deltaLastSample
                else:
                    delta = deltaFirstSample

                # Getting the tau value
                tau = self._getTauValue(n)

                # Getting the minimal delta value to identify this test value as an outlier
                outlierDeltaLimit = tau * std

                isAnOutlier = delta > outlierDeltaLimit
                if not isAnOutlier:
                    continueIdentificationOutliers = False
                else:
                    # Incrementing the outlier count
                    outlierCount += 1

                    # Removing this data sample from the data array
                    if usingLastSample:
                        npArrayData = npArrayData[:-1]
                    else:
                        npArrayData = npArrayData[1:]

        return outlierCount