#!python3
from __future__ import absolute_import, division, print_function,
unicode_literals
import apsw
db = apsw.Connection()
db.executescript("""
create table x
(
value integer not null
);
insert into x values (120), (130), (140), (110);
""")
# Method 1: Using partial Running calc
sumx = 0.0
sumx2 = 0.0
count = 0
for row in db.execute('select value from x'):
sumx += row.value
sumx2 += row.value ** 2
count += 1
sd1 = (sumx2 / count - (sumx / count) ** 2) ** 0.5
# Method 2: Using double retrieval brute force calc
avgx = 0.0
avgm = 0.0
count = 0
for row in db.execute('select avg(value) as avg from x'):
avgx = row.avg
for row in db.execute('select value from x'):
avgm += (row.value - avgx) ** 2
count += 1
sd2 = (avgm / count) ** 0.5
# Method 3: Using my extension which does full running calc
for row in db.execute('select stdevp(value) as stdev from x'):
sd3 = row.stdev
# Method 4: Using mostly pure SQL
for row in db.execute('select count(value) as count, sum(value) as sumx,
sum(value*value) as sumx2 from x'):
sd4 = (row.sumx2 / row.count - (row.sumx / row.count) ** 2) ** 0.5
# Method 5: Using even more calcs in SQL
for row in db.execute('''select sumx2 / count - (sumx / count) * (sumx / count)
as var
from (select count(value) as count, sum(value) as
sumx, sum(value*value) as sumx2 from x)'''):
sd5 = row.var ** 0.5
print(sd1, sd2, sd3, sd4, sd5)
=>
11.180339887498949 11.180339887498949 11.180339887498949 11.180339887498949
11.180339887498949
You could even calculate the square root in a recursive CTE if you wanted and
not have your application do anything at all other than submit the query.
However, Method 1 is likely the most efficient (and easiest) to do assuming
that you do not have an extension which calculates the aggregate, and is also
likely to execute the fastest.
--
The fact that there's a Highway to Hell but only a Stairway to Heaven says a
lot about anticipated traffic volume.
>-----Original Message-----
>From: sqlite-users <[email protected]> On
>Behalf Of Richard Damon
>Sent: Saturday, 12 October, 2019 09:33
>To: [email protected]
>Subject: Re: [sqlite] Standard deviation last x entries
>
>On 10/12/19 11:23 AM, Richard Damon wrote:
>> On 10/12/19 10:08 AM, Bart Smissaert wrote:
>>> How do I get the standard deviation of the last 4 entries (there could
>be
>>> less than 4) of an integer column grouped by an integer ID entry in
>another
>>> column in the same table.
>>>
>>> So data could be like this:
>>>
>>> ID xValue
>>> ----------------
>>> 1 130
>>> 1 120
>>> 1 140
>>> 1 100
>>> 1 110
>>> 2 140
>>> 2 130
>>> 2 150
>>>
>>> I know that SQLite doesn't have the SQRT function (and I can't make a
>UDF
>>> as I am doing this in an Android phone app) but I can do this last
>step in
>>> code.
>>>
>>> RBS
>> I may not be the best at forming the SQL to do this, but here is the
>> general idea I would use. First use a query with ORDER BY DECR and
>LIMIT
>> to get the last 4 items, and select the value, and a computer column of
>> value2 = value * value.
>>
>> Then over this result, do a GROUP BY to compute N = count(value), S =
>> sum(value), S2 =sum(value2)
>>
>> You can then compute the variance (standard deviation squared) from the
>> equation
>>
>> Variance = S2 / N - (S / N) * (S / N)
>>
>> This equation assumes that you are working with the full population and
>> not just a sample, if you data is to be considered a sample you need to
>> make a small adjustment in the formula to
>>
>> Estimated Variance of Popultion = S2 / (N-1) - S * S / (N * (N-1))
>>
>>
>(Sorry about the messed up subject, reposting to fix it)
>
>--
>Richard Damon
>
>_______________________________________________
>sqlite-users mailing list
>[email protected]
>http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users
_______________________________________________
sqlite-users mailing list
[email protected]
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users