Hi,
I recently once more noticed that timestamptz_out is really, really
slow. To quantify that, I created a bunch of similar sized tables:
CREATE TABLE tbl_timestamp AS SELECT NOW() FROM generate_series(1, 100000) a,
generate_series(1, 100) b;
CREATE TABLE tbl_int8 AS SELECT 1::bigint FROM generate_series(1, 100000) a,
generate_series(1, 100) b;
CREATE TABLE tbl_bytea AS SELECT ' '::bytea FROM generate_series(1, 100000)
a, generate_series(1, 100) b;
These all end up being 346MB large.
COPY tbl_bytea TO '/dev/null';
Time: 1173.484 ms
COPY tbl_int8 TO '/dev/null';
Time: 1030.756 ms
COPY tbl_timestamp TO '/dev/null';
Time: 6598.030
(all best of three)
Yes, timestamp outputs more data as a whole, but being 5 times as slow
is still pretty darn insane. To make sure that's not the cause, here's
another table:
CREATE TABLE tbl_timestamptext AS SELECT NOW()::text FROM generate_series(1,
100000) a, generate_series(1, 100) b;
COPY tbl_timestamptext TO '/dev/null';
Time: 1449.554 ms
So it's really just the timestamp code.
Profiling it shows:
Overhead Command Shared Object Symbol
- 38.33% postgres_stock libc-2.19.so [.] vfprintf
- 97.92% vfprintf
_IO_vsprintf
- sprintf
+ 70.25% EncodeDateTime
+ 29.75% AppendSeconds.constprop.10
+ 1.11% _IO_default_xsputn
- 8.22% postgres_stock libc-2.19.so [.] _IO_default_xsputn
- 99.43% _IO_default_xsputn
- 90.09% vfprintf
_IO_vsprintf
- sprintf
+ 74.15% EncodeDateTime
+ 25.85% AppendSeconds.constprop.10
+ 9.72% _IO_padn
+ 0.57% vfprintf
+ 7.76% postgres_stock postgres_stock [.] CopyOneRowTo
So nearly all the time is spent somewhere inside the sprintf calls. Not
nice.
The only thing I could come up to make the sprintfs cheaper was to
combine them into one and remove some of the width specifiers that
aren't needed. That doesn't buy us very much.
I then proceeded to replace the sprintf call with hand-rolled
conversions. And wow, the benefit is far bigger than I'd assumed:
postgres[7236][1]=# COPY tbl_timestamp TO '/dev/null';
Time: 2430.521 ms
So, by hand-rolling the ISO conversion in EncodeDateTime() we got a
~250% performance improvement. I'd say that's worthwhile.
The attached patch shows what I did. While there's some polishing
possible, as a whole, it's pretty ugly. But I think timestamp data is so
common that it's worth the effort.
Does anybody have a fundamentally nicer idea than the attached to
improvide this?
Greetings,
Andres Freund
>From 1d7b6110f8864ee00c1fe4f54d8937347ade7d80 Mon Sep 17 00:00:00 2001
From: Andres Freund <[email protected]>
Date: Mon, 27 Jul 2015 23:09:33 +0200
Subject: [PATCH] WIP: faster timestamp[tz]_out
---
src/backend/utils/adt/datetime.c | 108 ++++++++++++++++++++++++++++++++
src/test/regress/expected/horology.out | 24 ++++---
src/test/regress/expected/timestamp.out | 62 +++++++++++-------
src/test/regress/sql/timestamp.sql | 1 +
4 files changed, 164 insertions(+), 31 deletions(-)
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 2a44b6e..4c13f01 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -3968,7 +3968,115 @@ EncodeDateTime(struct pg_tm * tm, fsec_t fsec, bool print_tz, int tz, const char
switch (style)
{
+#ifdef HAVE_INT64_TIMESTAMP
+ case USE_ISO_DATES:
+ /*
+ * Fastpath for most common format and range. Not using sprintf
+ * provides significant performance benefits, and timestamp data
+ * is pretty common. All sane use cases dealing with large amounts
+ * of data use iso timestamps, so we can focus on optimizing
+ * those, and keep the rest of the code maintainable.
+ */
+ if (tm->tm_year > 0 && tm->tm_year < 10000)
+ {
+ int year = (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1);
+
+ *str++ = (year / 1000) + '0';
+ *str++ = (year / 100) % 10 + '0';
+ *str++ = (year / 10) % 10 + '0';
+ *str++ = year % 10 + '0';
+ *str++ = '-';
+ *str++ = (tm->tm_mon / 10) + '0';
+ *str++ = tm->tm_mon % 10 + '0';
+ *str++ = '-';
+ *str++ = (tm->tm_mday / 10) + '0';
+ *str++ = tm->tm_mday % 10 + '0';
+ *str++ = ' ';
+ *str++ = (tm->tm_hour / 10) + '0';
+ *str++ = tm->tm_hour % 10 + '0';
+ *str++ = ':';
+ *str++ = (tm->tm_min / 10) + '0';
+ *str++ = tm->tm_min % 10 + '0';
+ *str++ = ':';
+ *str++ = (tm->tm_sec / 10) + '0';
+ *str++ = tm->tm_sec % 10 + '0';
+
+ /*
+ * Yes, this is darned ugly and would look nicer in a loop,
+ * but some versions of gcc can't convert the divisions into
+ * more efficient instructions unless manually unrolled.
+ */
+ if (fsec != 0)
+ {
+ int fseca = abs(fsec);
+
+ *str++ = '.';
+
+ if (fseca % 1000000 != 0)
+ {
+ *str++ = (fseca / 100000) + '0';
+
+ if (fseca % 100000 != 0)
+ {
+ *str++ = ((fseca / 10000) % 10) + '0';
+
+ if (fseca % 10000 != 0)
+ {
+ *str++ = ((fseca / 1000) % 10) + '0';
+
+ if (fseca % 1000 != 0)
+ {
+ *str++ = ((fseca / 100) % 10) + '0';
+
+ if (fseca % 100 != 0)
+ {
+ *str++ = ((fseca / 10) % 10) + '0';
+
+ if (fseca % 10 != 0)
+ {
+ *str++ = (fseca % 10) + '0';
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (print_tz)
+ {
+ int hour, min, sec;
+
+ sec = abs(tz);
+ min = sec / SECS_PER_MINUTE;
+ sec -= min * SECS_PER_MINUTE;
+ hour = min / MINS_PER_HOUR;
+ min -= hour * MINS_PER_HOUR;
+
+ *str++ = (tz <= 0 ? '+' : '-');
+
+ *str++ = (hour / 10) + '0';
+ *str++ = hour % 10 + '0';
+
+ if (min != 0)
+ {
+ *str++ = (min / 10) + '0';
+ *str++ = min % 10 + '0';
+ }
+
+ if (sec != 0)
+ {
+ *str++ = (sec / 10) + '0';
+ *str++ = sec % 10 + '0';
+ }
+ }
+ *str++ = 0;
+ break;
+ }
+ /* fall through for stuff not handled in the fast path */
+#else
case USE_ISO_DATES:
+#endif
case USE_XSD_DATES:
/* Compatible with ISO-8601 date formats */
diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out
index 1fe02be..67b469b 100644
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -531,7 +531,8 @@ SELECT '' AS "64", d1 + interval '1 year' AS one_year FROM TIMESTAMP_TBL;
| Mon Jan 01 17:32:01 2001
| Mon Dec 31 17:32:01 2001
| Tue Jan 01 17:32:01 2002
-(65 rows)
+ | Tue Jan 01 17:32:01 12002
+(66 rows)
SELECT '' AS "64", d1 - interval '1 year' AS one_year FROM TIMESTAMP_TBL;
64 | one_year
@@ -601,7 +602,8 @@ SELECT '' AS "64", d1 - interval '1 year' AS one_year FROM TIMESTAMP_TBL;
| Fri Jan 01 17:32:01 1999
| Fri Dec 31 17:32:01 1999
| Sat Jan 01 17:32:01 2000
-(65 rows)
+ | Sat Jan 01 17:32:01 12000
+(66 rows)
SELECT timestamp with time zone '1996-03-01' - interval '1 second' AS "Feb 29";
Feb 29
@@ -2290,7 +2292,8 @@ SELECT '' AS "64", d1 AS us_postgres FROM TIMESTAMP_TBL;
| Sat Jan 01 17:32:01 2000
| Sun Dec 31 17:32:01 2000
| Mon Jan 01 17:32:01 2001
-(65 rows)
+ | Mon Jan 01 17:32:01 12001
+(66 rows)
SELECT '' AS seven, f1 AS us_postgres FROM ABSTIME_TBL;
seven | us_postgres
@@ -2373,7 +2376,8 @@ SELECT '' AS "64", d1 AS us_iso FROM TIMESTAMP_TBL;
| 2000-01-01 17:32:01
| 2000-12-31 17:32:01
| 2001-01-01 17:32:01
-(65 rows)
+ | 12001-01-01 17:32:01
+(66 rows)
SELECT '' AS seven, f1 AS us_iso FROM ABSTIME_TBL;
seven | us_iso
@@ -2462,7 +2466,8 @@ SELECT '' AS "64", d1 AS us_sql FROM TIMESTAMP_TBL;
| 01/01/2000 17:32:01
| 12/31/2000 17:32:01
| 01/01/2001 17:32:01
-(65 rows)
+ | 01/01/12001 17:32:01
+(66 rows)
SELECT '' AS seven, f1 AS us_sql FROM ABSTIME_TBL;
seven | us_sql
@@ -2558,8 +2563,9 @@ SELECT '' AS "65", d1 AS european_postgres FROM TIMESTAMP_TBL;
| Sat 01 Jan 17:32:01 2000
| Sun 31 Dec 17:32:01 2000
| Mon 01 Jan 17:32:01 2001
+ | Mon 01 Jan 17:32:01 12001
| Thu 13 Jun 00:00:00 1957
-(66 rows)
+(67 rows)
SELECT '' AS seven, f1 AS european_postgres FROM ABSTIME_TBL;
seven | european_postgres
@@ -2648,8 +2654,9 @@ SELECT '' AS "65", d1 AS european_iso FROM TIMESTAMP_TBL;
| 2000-01-01 17:32:01
| 2000-12-31 17:32:01
| 2001-01-01 17:32:01
+ | 12001-01-01 17:32:01
| 1957-06-13 00:00:00
-(66 rows)
+(67 rows)
SELECT '' AS seven, f1 AS european_iso FROM ABSTIME_TBL;
seven | european_iso
@@ -2738,8 +2745,9 @@ SELECT '' AS "65", d1 AS european_sql FROM TIMESTAMP_TBL;
| 01/01/2000 17:32:01
| 31/12/2000 17:32:01
| 01/01/2001 17:32:01
+ | 01/01/12001 17:32:01
| 13/06/1957 00:00:00
-(66 rows)
+(67 rows)
SELECT '' AS seven, f1 AS european_sql FROM ABSTIME_TBL;
seven | european_sql
diff --git a/src/test/regress/expected/timestamp.out b/src/test/regress/expected/timestamp.out
index a092fc2..463484f 100644
--- a/src/test/regress/expected/timestamp.out
+++ b/src/test/regress/expected/timestamp.out
@@ -171,6 +171,7 @@ INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 1999');
INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2000');
INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 2000');
INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2001');
+INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 12001');
-- Currently unsupported syntax and ranges
INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 -0097');
ERROR: time zone displacement out of range: "Feb 16 17:32:01 -0097"
@@ -248,7 +249,8 @@ SELECT '' AS "64", d1 FROM TIMESTAMP_TBL;
| Sat Jan 01 17:32:01 2000
| Sun Dec 31 17:32:01 2000
| Mon Jan 01 17:32:01 2001
-(65 rows)
+ | Mon Jan 01 17:32:01 12001
+(66 rows)
-- Demonstrate functions and operators
SELECT '' AS "48", d1 FROM TIMESTAMP_TBL
@@ -304,7 +306,8 @@ SELECT '' AS "48", d1 FROM TIMESTAMP_TBL
| Sat Jan 01 17:32:01 2000
| Sun Dec 31 17:32:01 2000
| Mon Jan 01 17:32:01 2001
-(49 rows)
+ | Mon Jan 01 17:32:01 12001
+(50 rows)
SELECT '' AS "15", d1 FROM TIMESTAMP_TBL
WHERE d1 < timestamp without time zone '1997-01-02';
@@ -402,7 +405,8 @@ SELECT '' AS "63", d1 FROM TIMESTAMP_TBL
| Sat Jan 01 17:32:01 2000
| Sun Dec 31 17:32:01 2000
| Mon Jan 01 17:32:01 2001
-(64 rows)
+ | Mon Jan 01 17:32:01 12001
+(65 rows)
SELECT '' AS "16", d1 FROM TIMESTAMP_TBL
WHERE d1 <= timestamp without time zone '1997-01-02';
@@ -480,7 +484,8 @@ SELECT '' AS "49", d1 FROM TIMESTAMP_TBL
| Sat Jan 01 17:32:01 2000
| Sun Dec 31 17:32:01 2000
| Mon Jan 01 17:32:01 2001
-(50 rows)
+ | Mon Jan 01 17:32:01 12001
+(51 rows)
SELECT '' AS "54", d1 - timestamp without time zone '1997-01-02' AS diff
FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
@@ -873,7 +878,8 @@ SELECT '' AS to_char_1, to_char(d1, 'DAY Day day DY Dy dy MONTH Month month RM M
| SATURDAY Saturday saturday SAT Sat sat JANUARY January january I JAN Jan jan
| SUNDAY Sunday sunday SUN Sun sun DECEMBER December december XII DEC Dec dec
| MONDAY Monday monday MON Mon mon JANUARY January january I JAN Jan jan
-(65 rows)
+ | MONDAY Monday monday MON Mon mon JANUARY January january I JAN Jan jan
+(66 rows)
SELECT '' AS to_char_2, to_char(d1, 'FMDAY FMDay FMday FMMONTH FMMonth FMmonth FMRM')
FROM TIMESTAMP_TBL;
@@ -944,12 +950,13 @@ SELECT '' AS to_char_2, to_char(d1, 'FMDAY FMDay FMday FMMONTH FMMonth FMmonth F
| SATURDAY Saturday saturday JANUARY January january I
| SUNDAY Sunday sunday DECEMBER December december XII
| MONDAY Monday monday JANUARY January january I
-(65 rows)
+ | MONDAY Monday monday JANUARY January january I
+(66 rows)
SELECT '' AS to_char_3, to_char(d1, 'Y,YYY YYYY YYY YY Y CC Q MM WW DDD DD D J')
FROM TIMESTAMP_TBL;
- to_char_3 | to_char
------------+-------------------------------------------------
+ to_char_3 | to_char
+-----------+----------------------------------------------------
|
|
| 1,970 1970 970 70 0 20 1 01 01 001 01 5 2440588
@@ -1015,7 +1022,8 @@ SELECT '' AS to_char_3, to_char(d1, 'Y,YYY YYYY YYY YY Y CC Q MM WW DDD DD D J')
| 2,000 2000 000 00 0 20 1 01 01 001 01 7 2451545
| 2,000 2000 000 00 0 20 4 12 53 366 31 1 2451910
| 2,001 2001 001 01 1 21 1 01 01 001 01 2 2451911
-(65 rows)
+ | 12,001 12001 001 01 1 121 1 01 01 001 01 2 6104336
+(66 rows)
SELECT '' AS to_char_4, to_char(d1, 'FMY,YYY FMYYYY FMYYY FMYY FMY FMCC FMQ FMMM FMWW FMDDD FMDD FMD FMJ')
FROM TIMESTAMP_TBL;
@@ -1086,7 +1094,8 @@ SELECT '' AS to_char_4, to_char(d1, 'FMY,YYY FMYYYY FMYYY FMYY FMY FMCC FMQ FMMM
| 2,000 2000 0 0 0 20 1 1 1 1 1 7 2451545
| 2,000 2000 0 0 0 20 4 12 53 366 31 1 2451910
| 2,001 2001 1 1 1 21 1 1 1 1 1 2 2451911
-(65 rows)
+ | 12,001 12001 1 1 1 121 1 1 1 1 1 2 6104336
+(66 rows)
SELECT '' AS to_char_5, to_char(d1, 'HH HH12 HH24 MI SS SSSS')
FROM TIMESTAMP_TBL;
@@ -1157,7 +1166,8 @@ SELECT '' AS to_char_5, to_char(d1, 'HH HH12 HH24 MI SS SSSS')
| 05 05 17 32 01 63121
| 05 05 17 32 01 63121
| 05 05 17 32 01 63121
-(65 rows)
+ | 05 05 17 32 01 63121
+(66 rows)
SELECT '' AS to_char_6, to_char(d1, E'"HH:MI:SS is" HH:MI:SS "\\"text between quote marks\\""')
FROM TIMESTAMP_TBL;
@@ -1228,7 +1238,8 @@ SELECT '' AS to_char_6, to_char(d1, E'"HH:MI:SS is" HH:MI:SS "\\"text between qu
| HH:MI:SS is 05:32:01 "text between quote marks"
| HH:MI:SS is 05:32:01 "text between quote marks"
| HH:MI:SS is 05:32:01 "text between quote marks"
-(65 rows)
+ | HH:MI:SS is 05:32:01 "text between quote marks"
+(66 rows)
SELECT '' AS to_char_7, to_char(d1, 'HH24--text--MI--text--SS')
FROM TIMESTAMP_TBL;
@@ -1299,12 +1310,13 @@ SELECT '' AS to_char_7, to_char(d1, 'HH24--text--MI--text--SS')
| 17--text--32--text--01
| 17--text--32--text--01
| 17--text--32--text--01
-(65 rows)
+ | 17--text--32--text--01
+(66 rows)
SELECT '' AS to_char_8, to_char(d1, 'YYYYTH YYYYth Jth')
FROM TIMESTAMP_TBL;
- to_char_8 | to_char
------------+-------------------------
+ to_char_8 | to_char
+-----------+---------------------------
|
|
| 1970TH 1970th 2440588th
@@ -1370,12 +1382,13 @@ SELECT '' AS to_char_8, to_char(d1, 'YYYYTH YYYYth Jth')
| 2000TH 2000th 2451545th
| 2000TH 2000th 2451910th
| 2001ST 2001st 2451911th
-(65 rows)
+ | 12001ST 12001st 6104336th
+(66 rows)
SELECT '' AS to_char_9, to_char(d1, 'YYYY A.D. YYYY a.d. YYYY bc HH:MI:SS P.M. HH:MI:SS p.m. HH:MI:SS pm')
FROM TIMESTAMP_TBL;
- to_char_9 | to_char
------------+---------------------------------------------------------------------
+ to_char_9 | to_char
+-----------+------------------------------------------------------------------------
|
|
| 1970 A.D. 1970 a.d. 1970 ad 12:00:00 A.M. 12:00:00 a.m. 12:00:00 am
@@ -1441,12 +1454,13 @@ SELECT '' AS to_char_9, to_char(d1, 'YYYY A.D. YYYY a.d. YYYY bc HH:MI:SS P.M. H
| 2000 A.D. 2000 a.d. 2000 ad 05:32:01 P.M. 05:32:01 p.m. 05:32:01 pm
| 2000 A.D. 2000 a.d. 2000 ad 05:32:01 P.M. 05:32:01 p.m. 05:32:01 pm
| 2001 A.D. 2001 a.d. 2001 ad 05:32:01 P.M. 05:32:01 p.m. 05:32:01 pm
-(65 rows)
+ | 12001 A.D. 12001 a.d. 12001 ad 05:32:01 P.M. 05:32:01 p.m. 05:32:01 pm
+(66 rows)
SELECT '' AS to_char_10, to_char(d1, 'IYYY IYY IY I IW IDDD ID')
FROM TIMESTAMP_TBL;
- to_char_10 | to_char
-------------+------------------------
+ to_char_10 | to_char
+------------+-------------------------
|
|
| 1970 970 70 0 01 004 4
@@ -1512,7 +1526,8 @@ SELECT '' AS to_char_10, to_char(d1, 'IYYY IYY IY I IW IDDD ID')
| 1999 999 99 9 52 363 6
| 2000 000 00 0 52 364 7
| 2001 001 01 1 01 001 1
-(65 rows)
+ | 12001 001 01 1 01 001 1
+(66 rows)
SELECT '' AS to_char_11, to_char(d1, 'FMIYYY FMIYY FMIY FMI FMIW FMIDDD FMID')
FROM TIMESTAMP_TBL;
@@ -1583,7 +1598,8 @@ SELECT '' AS to_char_11, to_char(d1, 'FMIYYY FMIYY FMIY FMI FMIW FMIDDD FMID')
| 1999 999 99 9 52 363 6
| 2000 0 0 0 52 364 7
| 2001 1 1 1 1 1 1
-(65 rows)
+ | 12001 1 1 1 1 1 1
+(66 rows)
-- timestamp numeric fields constructor
SELECT make_timestamp(2014,12,28,6,30,45.887);
diff --git a/src/test/regress/sql/timestamp.sql b/src/test/regress/sql/timestamp.sql
index b22cd48..b548c70 100644
--- a/src/test/regress/sql/timestamp.sql
+++ b/src/test/regress/sql/timestamp.sql
@@ -136,6 +136,7 @@ INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 1999');
INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2000');
INSERT INTO TIMESTAMP_TBL VALUES ('Dec 31 17:32:01 2000');
INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 2001');
+INSERT INTO TIMESTAMP_TBL VALUES ('Jan 01 17:32:01 12001');
-- Currently unsupported syntax and ranges
INSERT INTO TIMESTAMP_TBL VALUES ('Feb 16 17:32:01 -0097');
--
2.4.0.rc2.1.g3d6bc9a
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers