summaryrefslogtreecommitdiffstats
path: root/sc/source/core/inc/arraysumfunctor.hxx
blob: fb328584281218a0dc9a1b88b06e5ca6c6d4e5c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 */

#ifndef INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX
#define INCLUDED_SC_SOURCE_CORE_INC_ARRAYSUMFUNCTOR_HXX

#include <cstdint>
#include <rtl/math.hxx>

#include <tools/simd.hxx>
#include <tools/cpuid.hxx>

namespace sc
{

struct ArraySumFunctor
{
private:
    const double* mpArray;
    size_t mnSize;

public:
    ArraySumFunctor(const double* pArray, size_t nSize)
        : mpArray(pArray)
        , mnSize(nSize)
    {
    }

    double operator() ()
    {
        const static bool hasSSE2 = cpuid::hasSSE2();

        double fSum = 0.0;
        size_t i = 0;
        const double* pCurrent = mpArray;

        if (hasSSE2)
        {
            while ( i < mnSize && !simd::isAligned<double, 16>(pCurrent))
            {
                fSum += *pCurrent++;
                i++;
            }
            if( i < mnSize )
            {
                fSum += executeSSE2(i, pCurrent);
            }
        }
        else
            fSum += executeUnrolled(i, pCurrent);

        // sum rest of the array

        for (; i < mnSize; ++i)
            fSum += mpArray[i];

        // If the sum is a NaN, some of the terms were empty cells, probably.
        // Re-calculate, carefully
        if (!std::isfinite(fSum))
        {
            sal_uInt32 nErr = reinterpret_cast< sal_math_Double * >(&fSum)->nan_parts.fraction_lo;
            if (nErr & 0xffff0000)
            {
                fSum = 0;
                for (i = 0; i < mnSize; i++)
                {
                    if (!std::isfinite(mpArray[i]))
                    {
                        nErr = reinterpret_cast< const sal_math_Double * >(&mpArray[i])->nan_parts.fraction_lo;
                        if (!(nErr & 0xffff0000))
                            fSum += mpArray[i]; // Let errors encoded as NaNs propagate ???
                    }
                    else
                        fSum += mpArray[i];
                }
            }
        }
        return fSum;
    }

private:

    double executeSSE2(size_t& i, const double* pCurrent) const;
    double executeUnrolled(size_t& i, const double* pCurrent) const
    {
        size_t nRealSize = mnSize - i;
        size_t nUnrolledSize = nRealSize - (nRealSize % 4);

        if (nUnrolledSize > 0)
        {
            double sum0 = 0.0;
            double sum1 = 0.0;
            double sum2 = 0.0;
            double sum3 = 0.0;

            for (; i < nUnrolledSize; i += 4)
            {
                sum0 += *pCurrent++;
                sum1 += *pCurrent++;
                sum2 += *pCurrent++;
                sum3 += *pCurrent++;
            }
            return sum0 + sum1 + sum2 + sum3;
        }
        return 0.0;
    }
};

} // end namespace sc

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */