Re: [PATCH v2 6/7] perf script python: add Python3 support to sql scripts
Em Fri, Mar 08, 2019 at 11:47:55AM +0200, Adrian Hunter escreveu: > On 7/03/19 8:51 PM, Arnaldo Carvalho de Melo wrote: > > Em Wed, Mar 06, 2019 at 08:32:42AM -0800, Tony Jones escreveu: > >> On 3/6/19 1:26 AM, Adrian Hunter wrote: > >>> On 2/03/19 3:19 AM, Tony Jones wrote: > Support both Python2 and Python3 in the exported-sql-viewer.py, > export-to-postgresql.py and export-to-sqlite.py scripts > > There may be differences in the ordering of output lines due to > differences in dictionary ordering etc. However the format within lines > should be unchanged. > > The use of 'from __future__' implies the minimum supported Python2 > version > is now v2.6 > > Signed-off-by: Tony Jones > Signed-off-by: Seeteena Thoufeek > Cc: Adrian Hunter > >>> > >>> Apart from one issue (see below), it looks good, thank you! > >>> > --- > tools/perf/scripts/python/export-to-postgresql.py | 65 > +++ > tools/perf/scripts/python/export-to-sqlite.py | 23 > tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- > 3 files changed, 84 insertions(+), 46 deletions(-) > > diff --git a/tools/perf/scripts/python/export-to-postgresql.py > b/tools/perf/scripts/python/export-to-postgresql.py > index 390a351d15ea..439bbbf1e036 100644 > --- a/tools/perf/scripts/python/export-to-postgresql.py > +++ b/tools/perf/scripts/python/export-to-postgresql.py > @@ -10,6 +10,8 @@ > # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > for > # more details. > > +from __future__ import print_function > + > import os > import sys > import struct > @@ -199,6 +201,16 @@ import datetime > > from PySide.QtSql import * > > +if sys.version_info < (3, 0): > +def tobytes(str): > +return str > +else: > +def tobytes(str): > +# Use latin-1 (ISO-8859-1) so all code-points 0-255 > will result > +# in one byte (note utf-8 is 2 bytes for values > 128 > and > +# ascii is limited to values <= 128) > +return bytes(str, "ISO-8859-1") > >>> > >>> Probably this should be the server_encoding, but python2 allowed UTF-8 > >>> so let's just use UTF-8 for now. That will also mean doing the conversion > >>> before getting the len(), otherwise len() can be wrong. > >> > >> I'm not totally understanding what you're saying here. The rationale for > >> using latin-1 and not UTF-8 was clearly expressed in the comment. Else > >> you > >> do indeed run into length issues. > >> > >> Would it be easier, since you have a) more familiarity with the code b) > >> some > >> specific issues I'm not fully understanding if you just took this patch and > >> made the changes you want yourself. I doubt I'll ever use these scripta, > >> my > >> interest is purely in eliminating Python2 as a fixed requirement. > > > > Adrian, can you please reply here? I'm not familiar with this tobytes() > > python2/python3 difference, what do you mean about using > > 'server_encoding'? Where is that defined? > > Under python 2 the character set was not changed, so UTF-8, for example, > would pass through unmodified. > > Under python 3, the perf strings are converted to unicode because that > is what python 3 uses for strings. > > So under python 3, the correct character set must be used when converting > back to a character encoding that postgrsql expects. > > client_encoding is a postgresql connection parameter. > > server_encoding is a postgresql database parameter. > > To keep things simple for now, I would prefer to hard code UTF-8 rather > than ISO-8859-1 because I think it is more future-proof. UTF-8 is a > superset of ISO-8859-1 but can have multi-byte characters, so the > conversion must be performed before calculating the output string length. > > Ideally, the script would ask/tell the client or server what character > encoding to use, but hard coding will do for now. > > > This is what I would like: Tony, can you check this one so that I may process it? Would be nice to fold Adrian's comments above into the end result, ok? Thanks, - Arnaldo > > diff --git a/tools/perf/scripts/python/export-to-postgresql.py > b/tools/perf/scripts/python/export-to-postgresql.py > index 390a351d15ea..00ab972a2eba 100644 > --- a/tools/perf/scripts/python/export-to-postgresql.py > +++ b/tools/perf/scripts/python/export-to-postgresql.py > @@ -10,6 +10,8 @@ > # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > # more details. > > +from __future__ import print_function > + > import os > import sys > import struct > @@ -199,6 +201,18 @@ import datetime > > from PySide.QtSql import * > > +if sys.version_info < (3, 0): > + def
Re: [PATCH v2 6/7] perf script python: add Python3 support to sql scripts
On 7/03/19 8:51 PM, Arnaldo Carvalho de Melo wrote: > Em Wed, Mar 06, 2019 at 08:32:42AM -0800, Tony Jones escreveu: >> On 3/6/19 1:26 AM, Adrian Hunter wrote: >>> On 2/03/19 3:19 AM, Tony Jones wrote: Support both Python2 and Python3 in the exported-sql-viewer.py, export-to-postgresql.py and export-to-sqlite.py scripts There may be differences in the ordering of output lines due to differences in dictionary ordering etc. However the format within lines should be unchanged. The use of 'from __future__' implies the minimum supported Python2 version is now v2.6 Signed-off-by: Tony Jones Signed-off-by: Seeteena Thoufeek Cc: Adrian Hunter >>> >>> Apart from one issue (see below), it looks good, thank you! >>> --- tools/perf/scripts/python/export-to-postgresql.py | 65 +++ tools/perf/scripts/python/export-to-sqlite.py | 23 tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- 3 files changed, 84 insertions(+), 46 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 390a351d15ea..439bbbf1e036 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -199,6 +201,16 @@ import datetime from PySide.QtSql import * +if sys.version_info < (3, 0): + def tobytes(str): + return str +else: + def tobytes(str): + # Use latin-1 (ISO-8859-1) so all code-points 0-255 will result + # in one byte (note utf-8 is 2 bytes for values > 128 and + # ascii is limited to values <= 128) + return bytes(str, "ISO-8859-1") >>> >>> Probably this should be the server_encoding, but python2 allowed UTF-8 >>> so let's just use UTF-8 for now. That will also mean doing the conversion >>> before getting the len(), otherwise len() can be wrong. >> >> I'm not totally understanding what you're saying here. The rationale for >> using latin-1 and not UTF-8 was clearly expressed in the comment. Else you >> do indeed run into length issues. >> >> Would it be easier, since you have a) more familiarity with the code b) some >> specific issues I'm not fully understanding if you just took this patch and >> made the changes you want yourself. I doubt I'll ever use these scripta, my >> interest is purely in eliminating Python2 as a fixed requirement. > > Adrian, can you please reply here? I'm not familiar with this tobytes() > python2/python3 difference, what do you mean about using > 'server_encoding'? Where is that defined? Under python 2 the character set was not changed, so UTF-8, for example, would pass through unmodified. Under python 3, the perf strings are converted to unicode because that is what python 3 uses for strings. So under python 3, the correct character set must be used when converting back to a character encoding that postgrsql expects. client_encoding is a postgresql connection parameter. server_encoding is a postgresql database parameter. To keep things simple for now, I would prefer to hard code UTF-8 rather than ISO-8859-1 because I think it is more future-proof. UTF-8 is a superset of ISO-8859-1 but can have multi-byte characters, so the conversion must be performed before calculating the output string length. Ideally, the script would ask/tell the client or server what character encoding to use, but hard coding will do for now. This is what I would like: diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 390a351d15ea..00ab972a2eba 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -199,6 +201,18 @@ import datetime from PySide.QtSql import * +if sys.version_info < (3, 0): + def toserverstr(str): + return str + def toclientstr(str): + return str +else: + # Assume UTF-8 server_encoding and client_encoding + def toserverstr(str): + return bytes(str, "UTF_8") + def toclientstr(str): + return bytes(str, "UTF_8") + # Need to access PostgreSQL C library directly to use COPY FROM STDIN from ctypes import * libpq = CDLL("libpq.so.5") @@ -234,12 +248,14 @@ perf_db_export_mode = True perf_db_export_calls = False
Re: [PATCH v2 6/7] perf script python: add Python3 support to sql scripts
Em Wed, Mar 06, 2019 at 08:32:42AM -0800, Tony Jones escreveu: > On 3/6/19 1:26 AM, Adrian Hunter wrote: > > On 2/03/19 3:19 AM, Tony Jones wrote: > >> Support both Python2 and Python3 in the exported-sql-viewer.py, > >> export-to-postgresql.py and export-to-sqlite.py scripts > >> > >> There may be differences in the ordering of output lines due to > >> differences in dictionary ordering etc. However the format within lines > >> should be unchanged. > >> > >> The use of 'from __future__' implies the minimum supported Python2 version > >> is now v2.6 > >> > >> Signed-off-by: Tony Jones > >> Signed-off-by: Seeteena Thoufeek > >> Cc: Adrian Hunter > > > > Apart from one issue (see below), it looks good, thank you! > > > >> --- > >> tools/perf/scripts/python/export-to-postgresql.py | 65 > >> +++ > >> tools/perf/scripts/python/export-to-sqlite.py | 23 > >> tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- > >> 3 files changed, 84 insertions(+), 46 deletions(-) > >> > >> diff --git a/tools/perf/scripts/python/export-to-postgresql.py > >> b/tools/perf/scripts/python/export-to-postgresql.py > >> index 390a351d15ea..439bbbf1e036 100644 > >> --- a/tools/perf/scripts/python/export-to-postgresql.py > >> +++ b/tools/perf/scripts/python/export-to-postgresql.py > >> @@ -10,6 +10,8 @@ > >> # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > >> for > >> # more details. > >> > >> +from __future__ import print_function > >> + > >> import os > >> import sys > >> import struct > >> @@ -199,6 +201,16 @@ import datetime > >> > >> from PySide.QtSql import * > >> > >> +if sys.version_info < (3, 0): > >> + def tobytes(str): > >> + return str > >> +else: > >> + def tobytes(str): > >> + # Use latin-1 (ISO-8859-1) so all code-points 0-255 will result > >> + # in one byte (note utf-8 is 2 bytes for values > 128 and > >> + # ascii is limited to values <= 128) > >> + return bytes(str, "ISO-8859-1") > > > > Probably this should be the server_encoding, but python2 allowed UTF-8 > > so let's just use UTF-8 for now. That will also mean doing the conversion > > before getting the len(), otherwise len() can be wrong. > > I'm not totally understanding what you're saying here. The rationale for > using latin-1 and not UTF-8 was clearly expressed in the comment. Else you > do indeed run into length issues. > > Would it be easier, since you have a) more familiarity with the code b) some > specific issues I'm not fully understanding if you just took this patch and > made the changes you want yourself. I doubt I'll ever use these scripta, my > interest is purely in eliminating Python2 as a fixed requirement. Adrian, can you please reply here? I'm not familiar with this tobytes() python2/python3 difference, what do you mean about using 'server_encoding'? Where is that defined? - Arnaldo
Re: [PATCH v2 6/7] perf script python: add Python3 support to sql scripts
On 3/6/19 1:26 AM, Adrian Hunter wrote: > On 2/03/19 3:19 AM, Tony Jones wrote: >> Support both Python2 and Python3 in the exported-sql-viewer.py, >> export-to-postgresql.py and export-to-sqlite.py scripts >> >> There may be differences in the ordering of output lines due to >> differences in dictionary ordering etc. However the format within lines >> should be unchanged. >> >> The use of 'from __future__' implies the minimum supported Python2 version >> is now v2.6 >> >> Signed-off-by: Tony Jones >> Signed-off-by: Seeteena Thoufeek >> Cc: Adrian Hunter > > Apart from one issue (see below), it looks good, thank you! > >> --- >> tools/perf/scripts/python/export-to-postgresql.py | 65 >> +++ >> tools/perf/scripts/python/export-to-sqlite.py | 23 >> tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- >> 3 files changed, 84 insertions(+), 46 deletions(-) >> >> diff --git a/tools/perf/scripts/python/export-to-postgresql.py >> b/tools/perf/scripts/python/export-to-postgresql.py >> index 390a351d15ea..439bbbf1e036 100644 >> --- a/tools/perf/scripts/python/export-to-postgresql.py >> +++ b/tools/perf/scripts/python/export-to-postgresql.py >> @@ -10,6 +10,8 @@ >> # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> # more details. >> >> +from __future__ import print_function >> + >> import os >> import sys >> import struct >> @@ -199,6 +201,16 @@ import datetime >> >> from PySide.QtSql import * >> >> +if sys.version_info < (3, 0): >> +def tobytes(str): >> +return str >> +else: >> +def tobytes(str): >> +# Use latin-1 (ISO-8859-1) so all code-points 0-255 will result >> +# in one byte (note utf-8 is 2 bytes for values > 128 and >> +# ascii is limited to values <= 128) >> +return bytes(str, "ISO-8859-1") > > Probably this should be the server_encoding, but python2 allowed UTF-8 > so let's just use UTF-8 for now. That will also mean doing the conversion > before getting the len(), otherwise len() can be wrong. I'm not totally understanding what you're saying here. The rationale for using latin-1 and not UTF-8 was clearly expressed in the comment. Else you do indeed run into length issues. Would it be easier, since you have a) more familiarity with the code b) some specific issues I'm not fully understanding if you just took this patch and made the changes you want yourself. I doubt I'll ever use these scripta, my interest is purely in eliminating Python2 as a fixed requirement. Tony
Re: [PATCH v2 6/7] perf script python: add Python3 support to sql scripts
On 2/03/19 3:19 AM, Tony Jones wrote: > Support both Python2 and Python3 in the exported-sql-viewer.py, > export-to-postgresql.py and export-to-sqlite.py scripts > > There may be differences in the ordering of output lines due to > differences in dictionary ordering etc. However the format within lines > should be unchanged. > > The use of 'from __future__' implies the minimum supported Python2 version > is now v2.6 > > Signed-off-by: Tony Jones > Signed-off-by: Seeteena Thoufeek > Cc: Adrian Hunter Apart from one issue (see below), it looks good, thank you! > --- > tools/perf/scripts/python/export-to-postgresql.py | 65 > +++ > tools/perf/scripts/python/export-to-sqlite.py | 23 > tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- > 3 files changed, 84 insertions(+), 46 deletions(-) > > diff --git a/tools/perf/scripts/python/export-to-postgresql.py > b/tools/perf/scripts/python/export-to-postgresql.py > index 390a351d15ea..439bbbf1e036 100644 > --- a/tools/perf/scripts/python/export-to-postgresql.py > +++ b/tools/perf/scripts/python/export-to-postgresql.py > @@ -10,6 +10,8 @@ > # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > # more details. > > +from __future__ import print_function > + > import os > import sys > import struct > @@ -199,6 +201,16 @@ import datetime > > from PySide.QtSql import * > > +if sys.version_info < (3, 0): > + def tobytes(str): > + return str > +else: > + def tobytes(str): > + # Use latin-1 (ISO-8859-1) so all code-points 0-255 will result > + # in one byte (note utf-8 is 2 bytes for values > 128 and > + # ascii is limited to values <= 128) > + return bytes(str, "ISO-8859-1") Probably this should be the server_encoding, but python2 allowed UTF-8 so let's just use UTF-8 for now. That will also mean doing the conversion before getting the len(), otherwise len() can be wrong. Example of unicode symbol (works with python2 but not python3): $ cat unicode-var.c void myfunc\U0520(void) { } int main() { myfunc\U0520(); return 0; } $ gcc -O0 -ggdb3 -o unicode-var -finput-charset=UTF-8 -fextended-identifiers -fexec-charset=UTF-8 unicode-var.c $ perf record -e intel_pt//u ./unicode-var $ ldd `which perf` | grep python libpython2.7.so.1.0 => /usr/lib/x86_64-linux-gnu/libpython2.7.so.1.0 (0x7f2ca45bc000) $ perf script --itrace=be -s tools/perf/scripts/python/export-to-postgresql.py uvar_1 branches calls 2019-03-06 02:29:22.603095 Creating database... The server version of this PostgreSQL is unknown, falling back to the client version. The server version of this PostgreSQL is unknown, falling back to the client version. 2019-03-06 02:29:22.945439 Writing to intermediate files... 2019-03-06 02:29:22.991863 Copying to database... 2019-03-06 02:29:23.017039 Removing intermediate files... 2019-03-06 02:29:23.017542 Adding primary keys 2019-03-06 02:29:23.097973 Adding foreign keys 2019-03-06 02:29:23.161803 Done $ make PYTHON=python3 -C tools/perf install >/dev/null $ ldd `which perf` | grep python libpython3.6m.so.1.0 => /usr/lib/x86_64-linux-gnu/libpython3.6m.so.1.0 (0x7f4ec161f000) $ perf script --itrace=be -s tools/perf/scripts/python/export-to-postgresql.py uvar_2 branches calls 2019-03-06 02:36:19.837460 Creating database... The server version of this PostgreSQL is unknown, falling back to the client version. The server version of this PostgreSQL is unknown, falling back to the client version. 2019-03-06 02:36:20.168318 Writing to intermediate files... Traceback (most recent call last): File "tools/perf/scripts/python/export-to-postgresql.py", line 733, in symbol_table tobytes(symbol_name)) File "tools/perf/scripts/python/export-to-postgresql.py", line 212, in tobytes return bytes(str, "ISO-8859-1") UnicodeEncodeError: 'latin-1' codec can't encode character '\u0520' in position 6: ordinal not in range(256) Fatal Python error: problem in Python trace event handler Current thread 0x7f1706eb5740 (most recent call first): Aborted (core dumped) > + > # Need to access PostgreSQL C library directly to use COPY FROM STDIN > from ctypes import * > libpq = CDLL("libpq.so.5") > @@ -234,12 +246,14 @@ perf_db_export_mode = True > perf_db_export_calls = False > perf_db_export_callchains = False > > +def printerr(*args, **kw_args): > + print(*args, file=sys.stderr, **kw_args) > > def usage(): > - print >> sys.stderr, "Usage is: export-to-postgresql.py > [] [] []" > - print >> sys.stderr, "where:columns 'all' or 'branches'" > - print >> sys.stderr, " calls 'calls' => create calls > and call_paths table" > - print >> sys.stderr, " callchains 'callchains' => create > call_paths table" > + printerr("Usage is: export-to-postgresql.py [] > [] []") > + printerr("where:
[PATCH v2 6/7] perf script python: add Python3 support to sql scripts
Support both Python2 and Python3 in the exported-sql-viewer.py, export-to-postgresql.py and export-to-sqlite.py scripts There may be differences in the ordering of output lines due to differences in dictionary ordering etc. However the format within lines should be unchanged. The use of 'from __future__' implies the minimum supported Python2 version is now v2.6 Signed-off-by: Tony Jones Signed-off-by: Seeteena Thoufeek Cc: Adrian Hunter --- tools/perf/scripts/python/export-to-postgresql.py | 65 +++ tools/perf/scripts/python/export-to-sqlite.py | 23 tools/perf/scripts/python/exported-sql-viewer.py | 42 ++- 3 files changed, 84 insertions(+), 46 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 390a351d15ea..439bbbf1e036 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -199,6 +201,16 @@ import datetime from PySide.QtSql import * +if sys.version_info < (3, 0): + def tobytes(str): + return str +else: + def tobytes(str): + # Use latin-1 (ISO-8859-1) so all code-points 0-255 will result + # in one byte (note utf-8 is 2 bytes for values > 128 and + # ascii is limited to values <= 128) + return bytes(str, "ISO-8859-1") + # Need to access PostgreSQL C library directly to use COPY FROM STDIN from ctypes import * libpq = CDLL("libpq.so.5") @@ -234,12 +246,14 @@ perf_db_export_mode = True perf_db_export_calls = False perf_db_export_callchains = False +def printerr(*args, **kw_args): + print(*args, file=sys.stderr, **kw_args) def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py [] [] []" - print >> sys.stderr, "where:columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls and call_paths table" - print >> sys.stderr, " callchains 'callchains' => create call_paths table" + printerr("Usage is: export-to-postgresql.py [] [] []") + printerr("where:columns 'all' or 'branches'") + printerr(" calls 'calls' => create calls and call_paths table") + printerr(" callchains 'callchains' => create call_paths table") raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -273,7 +287,7 @@ def do_query(q, s): return raise Exception("Query failed: " + q.lastError().text()) -print datetime.datetime.today(), "Creating database..." +print(datetime.datetime.today(), "Creating database...") db = QSqlDatabase.addDatabase('QPSQL') query = QSqlQuery(db) @@ -506,12 +520,12 @@ do_query(query, 'CREATE VIEW samples_view AS ' ' FROM samples') -file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0) -file_trailer = "\377\377" +file_header = struct.pack("!11sii", tobytes("PGCOPY\n\377\r\n\0"), 0, 0) +file_trailer = tobytes("\377\377") def open_output_file(file_name): path_name = output_dir_name + "/" + file_name - file = open(path_name, "w+") + file = open(path_name, "wb+") file.write(file_header) return file @@ -526,13 +540,13 @@ def copy_output_file_direct(file, table_name): # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly def copy_output_file(file, table_name): - conn = PQconnectdb("dbname = " + dbname) + conn = PQconnectdb(tobytes("dbname = " + dbname)) if (PQstatus(conn)): raise Exception("COPY FROM STDIN PQconnectdb failed") file.write(file_trailer) file.seek(0) sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')" - res = PQexec(conn, sql) + res = PQexec(conn, tobytes(sql)) if (PQresultStatus(res) != 4): raise Exception("COPY FROM STDIN PQexec failed") data = file.read(65536) @@ -566,7 +580,7 @@ if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): - print datetime.datetime.today(), "Writing to intermediate files..." + print(datetime.datetime.today(), "Writing to intermediate files...") # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") machine_table(0, 0, "unknown") @@ -582,7 +596,7 @@ def trace_begin(): unhandled_count = 0 def trace_end(): - print datetime.datetime.today(), "Copying to database..." + print(datetime.datetime.today(), "Copying to database...")