On 4.12.2012 20:12, Alexander Korotkov wrote:
> Hi!
>
> On Sun, Dec 2, 2012 at 5:02 AM, Tomas Vondra <t...@fuzzy.cz
> <mailto:t...@fuzzy.cz>> wrote:
>
> I've tried to apply the patch with the current HEAD, but I'm getting
> segfaults whenever VACUUM runs (either called directly or from autovac
> workers).
>
> The patch applied cleanly against 9b3ac49e and needed a minor fix when
> applied on HEAD (because of an assert added to ginRedoCreatePTree), but
> that shouldn't be a problem.
>
>
> Thanks for testing! Patch is rebased with HEAD. The bug you reported was
> fixed.
Applies fine, but I get a segfault in dataPlaceToPage at gindatapage.c.
The whole backtrace is here: http://pastebin.com/YEPuWeuV
The messages written into PostgreSQL log are quite variable - usually it
looks like this:
2012-12-04 22:31:08 CET 31839 LOG: database system was not properly
shut down; automatic recovery in progress
2012-12-04 22:31:08 CET 31839 LOG: redo starts at 0/68A76E48
2012-12-04 22:31:08 CET 31839 LOG: unexpected pageaddr 0/1BE64000 in
log segment 000000010000000000000069, offset 15089664
2012-12-04 22:31:08 CET 31839 LOG: redo done at 0/69E63638
but I've seen this message too
2012-12-04 22:20:29 CET 31709 LOG: database system was not properly
shut down; automatic recovery in progress
2012-12-04 22:20:29 CET 31709 LOG: redo starts at 0/AEAFAF8
2012-12-04 22:20:29 CET 31709 LOG: record with zero length at 0/C7D5698
2012-12-04 22:20:29 CET 31709 LOG: redo done at 0/C7D55E
I wasn't able to prepare a simple testcase to reproduce this, so I've
attached two files from my "fun project" where I noticed it. It's a
simple DB + a bit of Python for indexing mbox archives inside Pg.
- create.sql - a database structure with a bunch of GIN indexes on
tsvector columns on "messages" table
- load.py - script for parsing mbox archives / loading them into the
"messages" table (warning: it's a bit messy)
Usage:
1) create the DB structure
$ createdb archives
$ psql archives < create.sql
2) fetch some archives (I consistently get SIGSEGV after first three)
$ wget
http://archives.postgresql.org/pgsql-hackers/mbox/pgsql-hackers.1997-01.gz
$ wget
http://archives.postgresql.org/pgsql-hackers/mbox/pgsql-hackers.1997-02.gz
$ wget
http://archives.postgresql.org/pgsql-hackers/mbox/pgsql-hackers.1997-03.gz
3) gunzip and load them using the python script
$ gunzip pgsql-hackers.*.gz
$ ./load.py --db archives pgsql-hackers.*
4) et voila - a SIGSEGV :-(
I suspect this might be related to the fact that the load.py script uses
savepoints quite heavily to handle UNIQUE_VIOLATION (duplicate messages).
Tomas
#!/bin/env python
import argparse
import datetime
import getpass
import multiprocessing
import os
import psycopg2
import psycopg2.extras
import psycopg2.errorcodes
import quopri
import random
import re
import sys
import traceback
import UserDict
from multiprocessing import Process, JoinableQueue
class Message(dict):
def __init__(self, message):
self.message = message
self.body = self.body(message)
self.headers = self.headers(message)
self.parts = self.parts(message)
def __getitem__(self, key):
if self.headers.has_key(key.lower()):
return self.headers[key.lower()]
else:
return None
def __setitem__(self, key, value):
self.headers.update({key.lower() : value})
def __delitem__(self, key):
if self.headers.has_key(key.lower()):
del self.headers[key.lower()]
def keys(self):
return self.headers.keys()
def get_body(self):
return self.body
def get_raw(self):
return self.message
def get_parts(self):
return self.parts
def get_headers(self):
return self.headers
def get_content_type(self):
if self.headers.has_key('content-type'):
return self.headers['content-type'].split(';')[0]
else:
return None
def __repr__(self):
return '%s %s' % (type(self).__name__, self.headers)
def is_multipart(self):
ctype = self.get_content_type()
if ctype != None and re.match('multipart/.*', ctype):
return True
else:
return False
def part_boundary(self):
if not self.is_multipart():
return None
else:
r = re.match('.*boundary="?([^"]*)"?', self.headers['content-type'], re.IGNORECASE)
if r:
return '--' + r.group(1)
# FIXME this keeps only the last value - needs to keep a list
def headers(self, message):
lines = message.split("\n")
key = ''
value = ''
headers = {};
for l in lines:
if l == '':
if key != '':
headers.update({key.lower() : value})
break
r = re.match('([a-zA-Z0-9-]*):\s*(.*)', l)
if r:
if key != '':
headers.update({key.lower() : value})
key = r.group(1)
value = r.group(2)
else:
value += ' ' + l.strip()
r = re.match('^From .*@.*\s+([a-zA-Z]*\s+[a-zA-Z]*\s+[0-9]+ [0-9]+:[0-9]+:[0-9]+\s+[0-9]{4})$', lines[0])
if r:
headers.update({'message-date' : r.group(1)})
r = re.match('^From bouncefilter\s+([a-zA-Z]*\s+[a-zA-Z]*\s+[0-9]+ [0-9]+:[0-9]+:[0-9]+\s+[0-9]{4})$', lines[0])
if r:
headers.update({'message-date' : r.group(1)})
r = re.match('^From scrappy$', lines[0])
if r:
r = re.search('^([^+-]*) [+-].*$', headers['date'])
if r:
headers.update({'message-date' : r.group(1)})
else:
r = re.search('^([^\(]*) \(.*', headers['date'])
if r:
headers.search({'message-date' : r.group(1)})
else:
headers.update({'message-date' : headers['date']})
return headers
def body(self, message):
lines = message.split("\n")
body = ''
in_body = False
for l in lines:
if in_body:
body += l + "\n"
if l == '':
in_body = True
return body.strip()
def is_boundary(self, boundary, line):
if (boundary == line) or ((boundary + '--') == line):
return True
else:
return False
def parts(self, message):
# not a multipart message - the whole message is a part
if not self.is_multipart():
return [message]
# split the message into parts
else:
boundary = self.part_boundary()
lines = self.body.split("\n")
part = ''
parts = []
for l in lines:
if self.is_boundary(boundary, l) and part != '':
parts.append(Message(part))
part = ''
elif self.is_boundary(boundary, l):
pass
else:
part += l + "\n"
return parts
def get_payload(self):
payload = ''
# if it's not a multi-part message, then just get the body
if not self.is_multipart():
# we do accpet only some content types (basically text/* and empty, which is supposed to be text/plain)
if self.get_content_type() == None or self.get_content_type().lower() == 'text/plain' or self.get_content_type().lower() == 'text/html':
# decode if needed
# FIXME handle quoted-printable too
if self['content-transfer-encoding'] == 'base64':
body = self.body.decode('base64')
elif self['content-transfer-encoding'] == 'quoted-printable':
body = self.qpdecodebody(self.body)
else:
body = self.body
# if it's text/html, strip the html tags
if self.get_content_type() == 'text/html':
body = self.clean_html(body)
return body.strip()
else:
# if it's a multi-part message, then try to get the text/plain part first
for p in self.parts:
if p.get_content_type() != None and p.get_content_type().lower() == 'text/plain':
return p.get_payload()
# no text/plain part, try to get any text/* part
for p in self.parts:
if p.get_content_type() != None and re.match('text/.*', p.get_content_type().lower()):
return p.get_payload()
# meh, nothing useful :-(
return None
def get_subject(self):
if not self['subject']:
return None
subject = ''
lines = self['subject'].split(' ')
for l in lines:
if (self.is_qpencoded(l)):
subject += ' ' + self.qpdecode(l)
elif (self.is_b64encoded(l)):
subject += ' ' + self.b64decode(l)
else:
subject += ' ' + l
return subject
def is_b64encoded(self, line):
r = re.match('=\?(.*)\?B\?(.*)\?=', line)
if r:
return True
else:
return False
def is_qpencoded(self, line):
r = re.match('=\?(.*)\?Q\?(.*)\?=', line)
if r:
return True
else:
return False
def b64decode(self, line):
lines = line.split(' ')
output = ''
for line in lines:
r = re.match('=\?(.*)\?B\?(.*)\?=', line)
if r:
output += ' ' + r.group(2).decode('base64')
return output
def qpdecodebody(self, body):
return quopri.decodestring(body)
def qpdecode(self, line):
lines = line.split(' ')
output = ''
for line in lines:
r = re.match('=\?(.*)\?Q\?(.*)\?=', line)
if r:
output += ' ' + quopri.decodestring(r.group(2))
return output
def clean_html(self, raw_html):
return re.sub(re.compile('<.*?>'),'', raw_html)
def load_files(id, connstr, queue):
conn = psycopg2.connect(connstr)
conn.set_client_encoding('LATIN1')
psycopg2.extras.register_hstore(conn)
cur = conn.cursor()
c = 0
n = 0
start_time = datetime.datetime.now()
try:
while True:
task = queue.get()
if not task:
print "worker %s : no more tasks, terminating" % (id,)
queue.task_done()
break;
for f in task['files']:
(msgs,errs,dups) = load_mbox(task['list'], f, cur)
conn.commit()
n += msgs
d = (datetime.datetime.now() - start_time).total_seconds()
print "worker %s : %s - imported %s messages (%s errors, %s duplicate): %s msgs/sec" % (id, f, msgs, errs, dups, round((c+n)/d,1))
# analyze only if we reach enough new messages (at least 250 and more that inserted so far)
if (n >= c) and (n >= 250):
c += n
n = 0
cur.execute('ANALYZE messages')
queue.task_done()
except KeyboardInterrupt:
print "worker %s : process stopped by Ctrl-C" % (id,)
finally:
cur.close()
conn.close()
def load_mbox(lname, filename, cur):
r = re.match('(.*)\.[0-9]+-[0-9]+', filename)
if r:
lst = r.group(1)
if lst != lname:
print "ERROR: mbox '%s' does not belong to list '%s'" % (filename, lname)
return
else:
print "unknown list (expected %s): %s" % (filename, lname)
return
msgs = read_messages(filename)
cur.execute("SAVEPOINT bulk_load")
n_msgs = 0
n_errs = 0
n_dups = 0
for m in msgs:
# print m['message-date'],"\t",len(m.get_body()),'B',"\t",m.get_content_type(),"\t",len(m.get_parts()),"\t",m['subject'],"\t",m['from']
try:
mid = None
if m['message-id']:
r = re.search('<(.*)>.*', m['message-id'])
if r:
mid = r.group(1)
date = m['message-date']
subject = m.get_subject()
if subject:
subject = subject.strip()
refs = None
if m['references']:
refs = re.findall('<([^>]*)>', m['references'])
in_reply_to = None
if m['in-reply-to']:
in_reply_to = re.findall('<([^>]*)>', m['in-reply-to'])
body = m.get_payload();
if m['message-date'] == '':
continue
cur.execute("INSERT INTO messages (list, message_id, in_reply_to, refs, sent, subject, author, body_plain, headers, raw_message) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
(lst, mid, in_reply_to, refs, date, subject, m['from'], m.get_payload(), m.get_headers(), m.get_raw()))
cur.execute("SAVEPOINT bulk_load")
n_msgs += 1
except psycopg2.DatabaseError as ex:
# well, no matter what happened, rollback to the savepoint
cur.execute("ROLLBACK TO bulk_load")
if ex.pgcode == psycopg2.errorcodes.UNIQUE_VIOLATION:
n_dups += 1
else:
print ex.pgcode,':',ex.pgerror
n_errs += 1
return (n_msgs,n_errs,n_dups)
def read_messages(filename):
messages = []
message = ''
f = open(filename, 'r')
for line in f:
if re.match('^From .*@.*\s+[a-zA-Z]*\s+[a-zA-Z]*\s+[0-9]+ [0-9]+:[0-9]+:[0-9]+\s+[0-9]{4}$', line):
if message != '':
messages.append(Message(message))
message = line
elif re.match('^From bouncefilter\s+[a-zA-Z]*\s+[a-zA-Z]*\s+[0-9]+ [0-9]+:[0-9]+:[0-9]+\s+[0-9]{4}$', line):
if message != '':
messages.append(Message(message))
message = line
elif re.match('^From scrappy$', line):
if message != '':
messages.append(Message(message))
message = line
else:
message += line
messages.append(Message(message))
return messages
def get_space(files):
'returns total size occupied by the files (from a list)'
s = 0
for f in files:
s += os.path.getsize(f)
return s
def build_parser():
parser = argparse.ArgumentParser(description='Archie loader')
parser.add_argument('files', metavar='FILE', type=str, nargs='+', help='mbox files to load')
parser.add_argument('--workers', dest='nworkers', action='store', type=int,
default=0, help='number of worker processes (default is number of cores)')
parser.add_argument('--host', dest='hostname', action='store', type=str,
default='localhost', help='database server hostname (localhost by default)')
parser.add_argument('--db', dest='dbname', action='store', type=str,
required=True, help='name of the database to test (required)')
parser.add_argument('--port', dest='port', action='store', type=int,
default=5432, help='database server port (5432 by default)')
parser.add_argument('--user', dest='user', action='store', type=str,
default=getpass.getuser(), help='db username (current OS user by default)')
parser.add_argument('--password', dest='password', action='store', type=str,
default=None, help='db password (empty by default)')
return parser
if __name__ == '__main__':
# build parser and parse arguments
parser = build_parser()
args = parser.parse_args()
if args.password:
connstr = 'host=%s dbname=%s user=%s port=%s password=%s' % (args.hostname,
args.dbname, args.user, args.port, arg.password)
else:
connstr = 'host=%s dbname=%s user=%s port=%s' % (args.hostname,
args.dbname, args.user, args.port)
# prepare list of files
lists = {}
for f in args.files:
r = re.match('([a-z-]*)\.[0-9]+-[0-9]+', f)
if r:
mlist = r.group(1)
if not lists.has_key(mlist):
lists.update({mlist : [f]})
else:
lists[mlist].append(f)
# get occupied space for each list (assumption space ~ number of messages)
keys = [{'key' : k, 'length' : get_space(lists[k])} for k in lists.keys()]
keys = sorted(keys, key=lambda x: -x['length'])
queue = JoinableQueue()
for k in keys:
queue.put({'list' : k['key'], 'files' : lists[k['key']]})
# get number of workers
nworkers = multiprocessing.cpu_count()
if args.nworkers > 0:
nworkers = args.nworkers
if nworkers > len(lists):
nworkers = len(lists)
try:
started = datetime.datetime.now()
# start workers
workers = []
for i in xrange(nworkers):
p = Process(target=load_files, args=(i, connstr, queue))
workers.append(p)
p.start()
# for each worker, put there one empty message, meaning "stop"
queue.put(False)
# wait for the workers to finish
queue.join()
runtime = (datetime.datetime.now() - started).total_seconds()
print "total runtime: %s seconds" % (round(runtime,1),)
except KeyboardInterrupt:
print "loading interrupted"
CREATE EXTENSION hstore;
CREATE TABLE lists (
id VARCHAR(64) PRIMARY KEY,
language VARCHAR(32) NOT NULL,
msg_first TIMESTAMP,
msg_last TIMESTAMP,
msg_count INT NOT NULL DEFAULT 0,
CONSTRAINT date_check CHECK (msg_last >= msg_first),
CONSTRAINT count_check CHECK (msg_count >= 0)
);
CREATE TABLE messages (
id SERIAL PRIMARY KEY,
parent_id INT REFERENCES messages(id),
thread_id INT,
level INT,
hash_id VARCHAR(32) NOT NULL UNIQUE,
list VARCHAR(32) NOT NULL REFERENCES lists(id),
message_id VARCHAR(200),
in_reply_to TEXT[],
refs TEXT[],
sent TIMESTAMP,
subject TEXT,
author TEXT,
body_plain TEXT,
body_tsvector tsvector,
subject_tsvector tsvector,
headers HSTORE,
raw_message TEXT
);
CREATE TABLE message_replies (
id INT REFERENCES messages(id),
parent_id INT REFERENCES messages(id),
PRIMARY KEY (id, parent_id)
);
CREATE OR REPLACE FUNCTION message_update_tsvector() RETURNS trigger AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
NEW.body_tsvector := to_tsvector('english',
strip_replies(NEW.body_plain));
NEW.subject_tsvector := to_tsvector('english', NEW.subject);
ELSIF TG_OP = 'UPDATE' THEN
IF NEW.body_plain IS DISTINCT FROM OLD.body_plain THEN
NEW.body_tsvector := to_tsvector('english',
strip_replies(NEW.body_plain));
END IF;
IF NEW.subject IS DISTINCT FROM OLD.subject THEN
NEW.subject_tsvector := to_tsvector('english', NEW.subject);
END IF;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- TODO detect sudden changes in subject, which usually mean someone started
-- a new thread by 'reply-to' an existing message
CREATE OR REPLACE FUNCTION message_update_thread() RETURNS trigger AS $$
DECLARE
v_min_tid INT;
v_max_tid INT;
v_level INT;
v_id INT;
v_subject TEXT;
v_len INT;
BEGIN
NEW.hash_id := md5(NEW.raw_message);
IF (NEW.in_reply_to IS NOT NULL) THEN
SELECT MIN(id), MIN(thread_id), MAX(thread_id), MIN(level) INTO v_id,
v_min_tid, v_max_tid, v_level
FROM messages WHERE list = NEW.list AND message_id =
ANY(NEW.in_reply_to);
IF v_min_tid IS NOT NULL THEN
NEW.parent_id := v_id;
NEW.thread_id := v_min_tid;
NEW.level := v_level+1;
END IF;
IF v_min_tid IS DISTINCT FROM v_max_tid THEN
RAISE WARNING 'min = %, max = %', v_min_tid, v_max_tid;
END IF;
END IF;
IF (NEW.thread_id IS NULL) AND (NEW.refs IS NOT NULL) THEN
SELECT MIN(id), MIN(thread_id), MAX(thread_id), MIN(level) INTO v_id,
v_min_tid, v_max_tid, v_level
FROM messages WHERE list = NEW.list AND message_id = ANY(NEW.refs);
IF v_min_tid IS NOT NULL THEN
NEW.parent_id := v_id;
NEW.thread_id := v_min_tid;
NEW.level := v_level+1;
END IF;
IF v_min_tid IS DISTINCT FROM v_max_tid THEN
RAISE WARNING 'min = %, max = %', v_min_tid, v_max_tid;
END IF;
END IF;
IF (NEW.thread_id IS NULL) AND (NEW.subject IS NOT NULL) AND (NEW.subject
~* '[Rr][Ee]:.*') THEN
v_subject := NEW.subject;
v_len := length(v_subject);
LOOP
v_subject := trim(regexp_replace(v_subject, '^[Rr][Ee]:\s?', ''));
EXIT WHEN length(v_subject) = v_len;
v_len := length(v_subject);
END LOOP;
SELECT MAX(id) INTO v_max_tid FROM messages
WHERE subject = v_subject AND list = NEW.list AND NOT (subject ~*
'[Rr][Ee]:.*');
IF v_max_tid IS NOT NULL THEN
SELECT level INTO v_level FROM messages WHERE id = v_max_tid;
NEW.thread_id := v_max_tid;
NEW.level := v_level + 1;
END IF;
END IF;
IF (NEW.thread_id IS NULL) THEN
NEW.thread_id := NEW.id;
NEW.level := 0;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION message_fill_replies() RETURNS trigger AS $$
DECLARE
v_min_id INT;
v_max_id INT;
BEGIN
INSERT INTO message_replies SELECT NEW.id, id
FROM messages WHERE list = NEW.list AND message_id = ANY(NEW.in_reply_to);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_vectors BEFORE INSERT OR UPDATE ON messages
FOR EACH ROW EXECUTE PROCEDURE message_update_tsvector();
CREATE TRIGGER update_thread BEFORE INSERT ON messages
FOR EACH ROW EXECUTE PROCEDURE message_update_thread();
CREATE TRIGGER fill_replies AFTER INSERT ON messages
FOR EACH ROW EXECUTE PROCEDURE message_fill_replies();
CREATE INDEX message_body_idx on messages using gin(body_tsvector);
CREATE INDEX message_subject_idx on messages using gin(subject_tsvector);
CREATE INDEX message_send_idx on messages(sent);
CREATE INDEX message_id_idx on messages(message_id);
CREATE INDEX message_subj_idx on messages(subject) WHERE NOT (subject ~*
'[Rr][Ee]:.*');
CREATE INDEX message_thread_idx on messages(thread_id);
-- rewrap e-mail body to given line length
CREATE OR REPLACE FUNCTION rewrap(p_text TEXT, p_len INT) RETURNS text AS $$
DECLARE
v_lines TEXT[];
v_words TEXT[];
v_line TEXT;
v_word TEXT;
v_result TEXT;
line_idx INT;
word_idx INT;
v_len INT;
BEGIN
-- handle tabs (replace with 4 spaces)
p_text := regexp_replace(p_text, '\t', ' ', 'g');
v_lines := regexp_split_to_array(p_text, '\n');
v_result := '';
IF v_lines IS NULL THEN
RETURN NULL;
END IF;
FOR line_idx IN array_lower(v_lines, 1) .. array_upper(v_lines, 1) LOOP
v_line := v_lines[line_idx];
IF length(v_line) < p_len THEN
v_result := v_result || trim(trailing ' ' from v_line) || E'\n';
v_line := '';
ELSE
v_words := regexp_split_to_array(v_line, ' ');
v_line := '';
FOR word_idx IN array_lower(v_words, 1) .. array_upper(v_words, 1)
LOOP
v_word := v_words[word_idx];
IF length(v_line) + length(v_word) + 1 <= p_len THEN
v_line := v_line || v_word || ' ';
ELSIF length(v_word) > p_len THEN
v_len := p_len - length(v_line);
WHILE length(v_word) > 0 LOOP
v_line := v_line || substr(v_word, 0, v_len);
v_result := v_result || trim(trailing ' ' from v_line)
|| E'\n';
v_line := '';
v_word := substr(v_word, v_len);
END LOOP;
ELSE
v_result := v_result || trim(trailing ' ' from v_line) ||
E'\n';
v_line := v_words[word_idx] || ' ';
-- FIXME handle case when the line is a quotation (prepend
>)
END IF;
END LOOP;
END IF;
IF length(v_line) > 0 THEN
v_result := v_result || trim(trailing ' ' from v_line) || E'\n';
END IF;
END LOOP;
RETURN v_result;
END;
$$ LANGUAGE plpgsql;
-- extracts e-mail address from the "From:" header
CREATE OR REPLACE FUNCTION getmail(p_text TEXT) RETURNS text AS $$
DECLARE
v_email TEXT;
BEGIN
IF p_text LIKE '%<%>%' THEN
v_email := SUBSTRING(p_text,'<(.*)>');
ELSE
v_email := p_text;
END IF;
RETURN lower(trim(v_email));
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION strip_replies(p_text TEXT) RETURNS text AS $$
DECLARE
v_result TEXT;
v_lines TEXT[];
v_line TEXT;
line_idx INT;
BEGIN
v_lines := regexp_split_to_array(p_text, '\n');
v_result := '';
IF v_lines IS NULL THEN
RETURN NULL;
END IF;
FOR line_idx IN array_lower(v_lines, 1) .. array_upper(v_lines, 1) LOOP
v_line := v_lines[line_idx];
IF NOT v_line ~* '^>[\s>]*' THEN
v_result := v_result || v_line || E'\n';
END IF;
END LOOP;
RETURN v_result;
END;
$$ LANGUAGE plpgsql;
INSERT INTO lists (id, language) VALUES ('pgsql-general', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-hackers', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-novice', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-sql', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-bugs', 'english');
INSERT INTO lists (id, language) VALUES ('pgadmin-hackers', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-jdbc', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-odbc', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-patches', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-advocacy', 'english');
INSERT INTO lists (id, language) VALUES ('pgsql-announce', 'english');
Program received signal SIGSEGV, Segmentation fault.
0x0000000000475cdf in dataPlaceToPage (btree=0x23bbb50, buf=2902, off=39,
prdata=0x7fff7f942408) at gindatapage.c:780
780 addInfo = datumCopy(addInfo, attr->attbyval,
attr->attlen);
(gdb) bt
#0 0x0000000000475cdf in dataPlaceToPage (btree=0x23bbb50, buf=2902, off=39,
prdata=0x7fff7f942408) at gindatapage.c:780
#1 0x0000000000478b5b in ginInsertValue (btree=0x23bbb50, stack=0x23b4268,
buildStats=0x0) at ginbtree.c:301
#2 0x0000000000478144 in ginInsertItemPointers (ginstate=0x7fff7f942a30,
attnum=1, gdi=0x23bbb50, items=0x23b48d0, addInfo=0x23b50e0,
addInfoIsNull=0x23b60f0 '\001' <repeats 200 times>..., nitem=273,
buildStats=0x0) at gindatapage.c:1411
#3 0x000000000046dd47 in addItemPointersToLeafTuple (ginstate=0x7fff7f942a30,
old=0x7f10e0c82f18, items=0x23b48d0, addInfo=0x23b50e0, addInfoIsNull=0x23b60f0
'\001' <repeats 200 times>..., nitem=273,
buildStats=0x0) at gininsert.c:336
#4 0x000000000046e362 in ginEntryInsert (ginstate=0x7fff7f942a30, attnum=1,
key=34027080, category=0 '\000', items=0x23b48d0, addInfo=0x23b50e0,
addInfoIsNull=0x23b60f0 '\001' <repeats 200 times>..., nitem=273,
buildStats=0x0) at gininsert.c:505
#5 0x0000000000482b2b in ginInsertCleanup (ginstate=0x7fff7f942a30,
vac_delay=0 '\000', stats=0x0) at ginfast.c:965
#6 0x00000000004812b0 in ginHeapTupleFastInsert (ginstate=0x7fff7f942a30,
collector=0x7fff7f942a10) at ginfast.c:443
#7 0x000000000046f158 in gininsert (fcinfo=0x7fff7f9451f0) at gininsert.c:854
#8 0x0000000000827c42 in FunctionCall6Coll (flinfo=0x1e297a0, collation=0,
arg1=139711552759192, arg2=140735333815936, arg3=140735333815904,
arg4=31779268, arg5=139711552772624, arg6=0) at fmgr.c:1480
#9 0x00000000004aa400 in index_insert (indexRelation=0x7f1121792598,
values=0x7fff7f945680, isnull=0x7fff7f945660 "", heap_t_ctid=0x1e4e9c4,
heapRelation=0x7f1121795a10, checkUnique=UNIQUE_CHECK_NO)
at indexam.c:216
#10 0x00000000005ffde4 in ExecInsertIndexTuples (slot=0x1d27450,
tupleid=0x1e4e9c4, estate=0x1ed6ba0) at execUtils.c:1087
#11 0x0000000000612590 in ExecInsert (slot=0x1d27450, planSlot=0x1ed8790,
estate=0x1ed6ba0, canSetTag=1 '\001') at nodeModifyTable.c:249
#12 0x0000000000613583 in ExecModifyTable (node=0x1ed7890) at
nodeModifyTable.c:906
#13 0x00000000005f2c71 in ExecProcNode (node=0x1ed7890) at execProcnode.c:376
#14 0x00000000005f0f1b in ExecutePlan (estate=0x1ed6ba0, planstate=0x1ed7890,
operation=CMD_INSERT, sendTuples=0 '\000', numberTuples=0,
direction=ForwardScanDirection, dest=0x1dbc430) at execMain.c:1394
#15 0x00000000005ef692 in standard_ExecutorRun (queryDesc=0x1d39e80,
direction=ForwardScanDirection, count=0) at execMain.c:302
#16 0x00000000005ef583 in ExecutorRun (queryDesc=0x1d39e80,
direction=ForwardScanDirection, count=0) at execMain.c:250
#17 0x0000000000719742 in ProcessQuery (plan=0x1dbc350,
sourceText=0x1e5a850 "INSERT INTO messages (list, message_id, in_reply_to,
refs, sent, subject, author, body_plain, headers, raw_message) VALUES
('pgsql-hackers', NULL, NULL, NULL, 'Jan 6 14:52:29 1998', 'Re: [HACKERS]
Do"..., params=0x0, dest=0x1dbc430, completionTag=0x7fff7f945cd0 "") at
pquery.c:185
#18 0x000000000071aeb9 in PortalRunMulti (portal=0x1c570e0, isTopLevel=1
'\001', dest=0x1dbc430, altdest=0x1dbc430, completionTag=0x7fff7f945cd0 "") at
pquery.c:1278
#19 0x000000000071a4f2 in PortalRun (portal=0x1c570e0,
count=9223372036854775807, isTopLevel=1 '\001', dest=0x1dbc430,
altdest=0x1dbc430, completionTag=0x7fff7f945cd0 "") at pquery.c:814
#20 0x00000000007147e7 in exec_simple_query (
query_string=0x1e5a850 "INSERT INTO messages (list, message_id,
in_reply_to, refs, sent, subject, author, body_plain, headers, raw_message)
VALUES ('pgsql-hackers', NULL, NULL, NULL, 'Jan 6 14:52:29 1998', 'Re:
[HACKERS] Do"...) at postgres.c:1048
#21 0x0000000000718810 in PostgresMain (argc=2, argv=0x1c35b50,
username=0x1c35b20 "tomas") at postgres.c:3966
#22 0x00000000006c6236 in BackendRun (port=0x1c52020) at postmaster.c:3665
#23 0x00000000006c59e9 in BackendStartup (port=0x1c52020) at postmaster.c:3349
#24 0x00000000006c291b in ServerLoop () at postmaster.c:1431
#25 0x00000000006c22f3 in PostmasterMain (argc=3, argv=0x1c33a10) at
postmaster.c:1180
#26 0x0000000000633acb in main (argc=3, argv=0x1c33a10) at main.c:197
(gdb) print addInfo
$1 = 0
(gdb) print attr
$2 = (Form_pg_attribute) 0x0
(gdb) print attr->attbyval
Cannot access memory at address 0x5c
(gdb) print attr->attlen
Cannot access memory at address 0x4c
(gdb) print *attr
Cannot access memory at address 0x0
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers