000001 hash-threshold 8 000002 000003 statement ok 000004 CREATE TABLE t1( x INTEGER, y VARCHAR(8) ) 000005 000006 statement ok 000007 INSERT INTO t1 VALUES(1,'true') 000008 000009 statement ok 000010 INSERT INTO t1 VALUES(0,'false') 000011 000012 statement ok 000013 INSERT INTO t1 VALUES(NULL,'NULL') 000014 000015 statement ok 000016 CREATE INDEX t1i1 ON t1(x) 000017 000018 skipif sqlite 000019 halt 000020 000021 # count(x), avg(x), sum(x), total(x), min(x), max(x) 000022 # group_concat(x), group_concat(x,y) 000023 000024 # EVIDENCE-OF: R-00466-56349 In any aggregate function that takes a 000025 # single argument, that argument can be preceded by the keyword 000026 # DISTINCT. 000027 000028 query I nosort 000029 SELECT count(DISTINCT x) FROM t1 000030 ---- 000031 2 000032 000033 query I nosort 000034 SELECT avg(DISTINCT x) FROM t1 000035 ---- 000036 0 000037 000038 query I nosort 000039 SELECT sum(DISTINCT x) FROM t1 000040 ---- 000041 1 000042 000043 query I nosort 000044 SELECT total(DISTINCT x) FROM t1 000045 ---- 000046 1 000047 000048 query I nosort 000049 SELECT min(DISTINCT x) FROM t1 000050 ---- 000051 0 000052 000053 query I nosort 000054 SELECT max(DISTINCT x) FROM t1 000055 ---- 000056 1 000057 000058 query T nosort 000059 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000060 ---- 000061 1,0 000062 000063 000064 # EVIDENCE-OF: R-00171-59428 In such cases, duplicate elements are 000065 # filtered before being passed into the aggregate function. 000066 000067 # EVIDENCE-OF: R-31453-41389 For example, the function "count(distinct 000068 # X)" will return the number of distinct values of column X instead of 000069 # the total number of non-null values in column X. 000070 000071 statement ok 000072 INSERT INTO t1 VALUES(2,'true') 000073 000074 statement ok 000075 INSERT INTO t1 VALUES(2,'true') 000076 000077 query I nosort 000078 SELECT count(DISTINCT x) FROM t1 000079 ---- 000080 3 000081 000082 000083 # EVIDENCE-OF: R-20409-33051 The avg() function returns the average 000084 # value of all non-NULL X within a group. 000085 000086 query I nosort 000087 SELECT avg(x) FROM t1 000088 ---- 000089 1 000090 000091 000092 # EVIDENCE-OF: R-29052-00975 String and BLOB values that do not look 000093 # like numbers are interpreted as 0. 000094 000095 query I nosort 000096 SELECT count(y) FROM t1 000097 ---- 000098 5 000099 000100 query I nosort 000101 SELECT avg(y) FROM t1 000102 ---- 000103 0 000104 000105 query I nosort 000106 SELECT sum(y) FROM t1 000107 ---- 000108 0 000109 000110 query I nosort 000111 SELECT total(y) FROM t1 000112 ---- 000113 0 000114 000115 query I nosort 000116 SELECT min(y) FROM t1 000117 ---- 000118 0 000119 000120 query I nosort 000121 SELECT max(y) FROM t1 000122 ---- 000123 0 000124 000125 query T nosort 000126 SELECT group_concat(y) FROM t1 000127 ---- 000128 true,false,NULL,true,true 000129 000130 # repeat with DISTINCT 000131 000132 query I nosort 000133 SELECT count(DISTINCT y) FROM t1 000134 ---- 000135 3 000136 000137 query I nosort 000138 SELECT avg(DISTINCT y) FROM t1 000139 ---- 000140 0 000141 000142 query I nosort 000143 SELECT sum(DISTINCT y) FROM t1 000144 ---- 000145 0 000146 000147 query I nosort 000148 SELECT total(DISTINCT y) FROM t1 000149 ---- 000150 0 000151 000152 query I nosort 000153 SELECT min(DISTINCT y) FROM t1 000154 ---- 000155 0 000156 000157 query I nosort 000158 SELECT max(DISTINCT y) FROM t1 000159 ---- 000160 0 000161 000162 query T nosort 000163 SELECT group_concat(DISTINCT y) FROM t1 000164 ---- 000165 true,false,NULL 000166 000167 000168 # EVIDENCE-OF: R-17177-10067 The result of avg() is always a floating 000169 # point value whenever there is at least one non-NULL input even if all 000170 # inputs are integers. 000171 000172 000173 query R nosort 000174 SELECT avg(x) FROM t1 000175 ---- 000176 1.250 000177 000178 query R nosort 000179 SELECT avg(DISTINCT x) FROM t1 000180 ---- 000181 1.000 000182 000183 000184 # EVIDENCE-OF: R-40597-22164 The result of avg() is NULL if and only if 000185 # there are no non-NULL inputs. 000186 000187 query I nosort label-NULL 000188 SELECT avg(x) FROM t1 WHERE y='null' 000189 ---- 000190 NULL 000191 000192 query I nosort label-NULL 000193 SELECT avg(DISTINCT x) FROM t1 WHERE y='null' 000194 ---- 000195 NULL 000196 000197 000198 # EVIDENCE-OF: R-34280-42283 The count(X) function returns a count of 000199 # the number of times that X is not NULL in a group. 000200 000201 query I nosort 000202 SELECT count(x) FROM t1 WHERE y='null' 000203 ---- 000204 0 000205 000206 query I nosort 000207 SELECT count(DISTINCT x) FROM t1 WHERE y='null' 000208 ---- 000209 0 000210 000211 query I nosort 000212 SELECT count(x) FROM t1 WHERE y='false' 000213 ---- 000214 1 000215 000216 query I nosort 000217 SELECT count(DISTINCT x) FROM t1 WHERE y='false' 000218 ---- 000219 1 000220 000221 000222 # EVIDENCE-OF: R-13776-21310 The count(*) function (with no arguments) 000223 # returns the total number of rows in the group. 000224 000225 query I nosort 000226 SELECT count(*) FROM t1 WHERE y='false' 000227 ---- 000228 1 000229 000230 # TBD: can DISTINCT be used with *? 000231 000232 statement error 000233 SELECT count(DISTINCT *) FROM t1 WHERE y='false' 000234 000235 000236 # EVIDENCE-OF: R-56088-25150 The group_concat() function returns a 000237 # string which is the concatenation of all non-NULL values of X. 000238 000239 query T nosort 000240 SELECT group_concat(x) FROM t1 NOT INDEXED 000241 ---- 000242 1,0,2,2 000243 000244 query T nosort 000245 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000246 ---- 000247 1,0,2 000248 000249 000250 # EVIDENCE-OF: R-08600-21007 If parameter Y is present then it is used 000251 # as the separator between instances of X. 000252 000253 query T nosort 000254 SELECT group_concat(x,':') FROM t1 NOT INDEXED 000255 ---- 000256 1:0:2:2 000257 000258 # TBD: DISTINCT can only be used with single parameters 000259 statement error 000260 SELECT group_concat(DISTINCT x,':') FROM t1 000261 000262 000263 # EVIDENCE-OF: R-39910-14723 A comma (",") is used as the separator if Y 000264 # is omitted. 000265 000266 query T nosort 000267 SELECT group_concat(x) FROM t1 NOT INDEXED 000268 ---- 000269 1,0,2,2 000270 000271 query T nosort 000272 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000273 ---- 000274 1,0,2 000275 000276 000277 # EVIDENCE-OF: R-52585-35928 The max() aggregate function returns the 000278 # maximum value of all values in the group. 000279 000280 query I nosort 000281 SELECT max(x) FROM t1 000282 ---- 000283 2 000284 000285 query I nosort 000286 SELECT max(DISTINCT x) FROM t1 000287 ---- 000288 2 000289 000290 000291 # TBD: last non-NULL value 000292 # EVIDENCE-OF: R-13053-11096 The maximum value is the value that would 000293 # be returned last in an ORDER BY on the same column. 000294 000295 query I nosort 000296 SELECT x FROM t1 WHERE x NOT NULL ORDER BY x 000297 ---- 000298 0 000299 1 000300 2 000301 2 000302 000303 query I nosort 000304 SELECT DISTINCT x FROM t1 WHERE x NOT NULL ORDER BY x 000305 ---- 000306 0 000307 1 000308 2 000309 000310 000311 # EVIDENCE-OF: R-50775-16353 Aggregate max() returns NULL if and only if 000312 # there are no non-NULL values in the group. 000313 000314 query I nosort label-NULL 000315 SELECT max(x) FROM t1 WHERE y='null' 000316 ---- 000317 NULL 000318 000319 query I nosort label-NULL 000320 SELECT max(DISTINCT x) FROM t1 WHERE y='null' 000321 ---- 000322 NULL 000323 000324 000325 # EVIDENCE-OF: R-16028-39081 The min() aggregate function returns the 000326 # minimum non-NULL value of all values in the group. 000327 000328 query I nosort 000329 SELECT min(x) FROM t1 000330 ---- 000331 0 000332 000333 query I nosort 000334 SELECT min(DISTINCT x) FROM t1 000335 ---- 000336 0 000337 000338 000339 # EVIDENCE-OF: R-30311-39793 The minimum value is the first non-NULL 000340 # value that would appear in an ORDER BY of the column. 000341 000342 query I nosort 000343 SELECT x FROM t1 WHERE x NOT NULL ORDER BY x 000344 ---- 000345 0 000346 1 000347 2 000348 2 000349 000350 query I nosort 000351 SELECT DISTINCT x FROM t1 WHERE x NOT NULL ORDER BY x 000352 ---- 000353 0 000354 1 000355 2 000356 000357 000358 # EVIDENCE-OF: R-10396-30188 Aggregate min() returns NULL if and only if 000359 # there are no non-NULL values in the group. 000360 000361 query I nosort label-NULL 000362 SELECT min(x) FROM t1 WHERE y='null' 000363 ---- 000364 NULL 000365 000366 query I nosort label-NULL 000367 SELECT min(DISTINCT x) FROM t1 WHERE y='null' 000368 ---- 000369 NULL 000370 000371 000372 # EVIDENCE-OF: R-24943-34514 The sum() and total() aggregate functions 000373 # return the sum of all non-NULL values in the group. 000374 000375 query I nosort label-sum 000376 SELECT sum(x) FROM t1 000377 ---- 000378 5 000379 000380 query I nosort label-sum 000381 SELECT total(x) FROM t1 000382 ---- 000383 5 000384 000385 query I nosort label-sum-distinct 000386 SELECT sum(DISTINCT x) FROM t1 000387 ---- 000388 3 000389 000390 query I nosort label-sum-distinct 000391 SELECT total(DISTINCT x) FROM t1 000392 ---- 000393 3 000394 000395 000396 # EVIDENCE-OF: R-44223-43966 If there are no non-NULL input rows then 000397 # sum() returns NULL but total() returns 0. 000398 000399 query I nosort label-NULL 000400 SELECT sum(x) FROM t1 WHERE y='null' 000401 ---- 000402 NULL 000403 000404 query I nosort label-NULL 000405 SELECT sum(DISTINCT x) FROM t1 WHERE y='null' 000406 ---- 000407 NULL 000408 000409 query I nosort label-zero 000410 SELECT total(x) FROM t1 WHERE y='null' 000411 ---- 000412 0 000413 000414 query I nosort label-zero 000415 SELECT total(DISTINCT x) FROM t1 WHERE y='null' 000416 ---- 000417 0 000418 000419 000420 # EVIDENCE-OF: R-07734-01023 The result of total() is always a floating 000421 # point value. 000422 000423 query R nosort 000424 SELECT total(x) FROM t1 000425 ---- 000426 5.000 000427 000428 query R nosort 000429 SELECT total(DISTINCT x) FROM t1 000430 ---- 000431 3.000 000432 000433 000434 # EVIDENCE-OF: R-19660-56479 The result of sum() is an integer value if 000435 # all non-NULL inputs are integers. 000436 000437 query I nosort label-sum 000438 SELECT sum(x) FROM t1 000439 ---- 000440 5 000441 000442 query I nosort label-sum-distinct 000443 SELECT sum(DISTINCT x) FROM t1 000444 ---- 000445 3 000446 000447 000448 # EVIDENCE-OF: R-33611-59266 If any input to sum() is neither an integer 000449 # nor a NULL, then sum() returns a floating point value which is an 000450 # approximation of the mathematical sum. 000451 000452 statement ok 000453 INSERT INTO t1 VALUES(4.0,'true') 000454 000455 query R nosort 000456 SELECT sum(x) FROM t1 000457 ---- 000458 9.000 000459 000460 query R nosort 000461 SELECT sum(DISTINCT x) FROM t1 000462 ---- 000463 7.000 000464 000465 000466 # TBD-EVIDENCE-OF: R-08904-24719 Sum() will throw an "integer overflow" 000467 # exception if all inputs are integers or NULL and an integer overflow 000468 # occurs at any point during the computation. 000469 000470 statement ok 000471 INSERT INTO t1 VALUES(1<<63,'true'); 000472 000473 statement ok 000474 INSERT INTO t1 VALUES(1<<63,'true'); 000475 000476 statement ok 000477 INSERT INTO t1 VALUES(-1,'true'); 000478 000479 query R nosort 000480 SELECT sum(x) FROM t1 000481 ---- 000482 000483 query R nosort 000484 SELECT sum(DISTINCT x) FROM t1 000485 ---- 000486 000487 000488 # TBD-EVIDENCE-OF: R-19553-64528 Total() never throws an integer overflow. 000489 000490 query R nosort 000491 SELECT total(x) FROM t1 000492 ---- 000493 000494 query R nosort 000495 SELECT total(DISTINCT x) FROM t1 000496 ----