[sqlalchemy] Re: Polymorphic Query All regardless of subclass

Andrew Martin Fri, 01 Mar 2019 09:23:22 -0800

Thanks for the reply, Mike. I should've offered the SQL equivalent of what 
I was going for to begin with. This is for a side project and I'm at work, 
so I didn't put as much thought into the question as I should've.


I think the link to the Polymorphic valued vertical table is what I'm 
after. I'll have to dig into to understand it fully, but it looks like what 
I want to do.

Let me see if I can clear up what I was originally after. I only have SQL 
server at work, but the first part is basically the same in postgres (which 
is what I'll be using at some point when I get outside the bounds of what 
SQLite can handle)

The way I had the tables defined before with the Values class and the 
subclassessubclass leads me to this raw SQL for what I'm wanting. Note, 
I've renamed values to vals because shadowning is bad. Here's the whole 
creation/populating and the queries I'm looking at.

CREATE TABLE datasets (
    id INTEGER NOT NULL, 
    uid varchar(50) NOT NULL, 
    name varchar(50) NOT NULL, 
    description varchar(50), 
    CONSTRAINT pk_datasets PRIMARY KEY (id)
)

CREATE TABLE variables (
    id INTEGER NOT NULL, 
    uid varchar(50) NOT NULL, 
    dataset_id INT NOT NULL, 
    name varchar(50) NOT NULL, 
    description varchar(50), 
    group_var_col_id INTEGER, 
    group_var_row_id INTEGER, 
    value_map varchar(50), 
    CONSTRAINT pk_variables PRIMARY KEY (id), 
    CONSTRAINT fk_variables_dataset_id_datasets FOREIGN KEY(dataset_id) 
REFERENCES datasets (id), 
    CONSTRAINT fk_variables_group_var_col_id_variables FOREIGN KEY(
group_var_col_id) REFERENCES variables (id), 
    CONSTRAINT fk_variables_group_var_row_id_variables FOREIGN KEY(
group_var_row_id) REFERENCES variables (id)
)

INSERT INTO datasets (id, uid, name, description)
values
(1, 'a1', 'ds1', 'test1')

INSERT INTO variables (id, uid, dataset_id, name, description)
values
(2, 'av2', 1, 'v2', 'testvar2'),
(1, 'av1', 1, 'v1', 'testvar1'),
(3, 'av3', 1, 'v3', 'testvar3')


CREATE TABLE vals (
    id INTEGER NOT NULL, 
    uid varchar(50) NOT NULL, 
    variable_id INT NOT NULL, 
    observation_id varchar(50) NOT NULL, 
    val_type varchar(50) NOT NULL, 
    CONSTRAINT pk_values PRIMARY KEY (id), 
    CONSTRAINT fk_values_variable_id_variables FOREIGN KEY(variable_id) 
REFERENCES variables (id)
)

INSERT INTO vals (id, uid, variable_id, observation_id, val_type)
values
(1, 'intval1', 1, 'resp1', 'int'),
(2, 'intval2', 1, 'resp2', 'int'),
(3, 'intval3', 1, 'resp3', 'int'),
(4, 'intval4', 1, 'resp4', 'int'),
(5, 'intval5', 1, 'resp5', 'int'),
(6, 'strval1', 2, 'resp1', 'str'),
(7, 'strval2', 2, 'resp2', 'str'),
(8, 'strval3', 2, 'resp3', 'str'),
(9, 'strval4', 2, 'resp4', 'str'),
(10, 'strval5', 2, 'resp5', 'str')

CREATE TABLE int_vals (
    id INTEGER NOT NULL, 
    val INTEGER, 
    CONSTRAINT pk_int_val PRIMARY KEY (id), 
    CONSTRAINT fk_int_vals_id_values FOREIGN KEY(id) REFERENCES vals (id)
)

INSERT INTO int_vals (id, val)
VALUES
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5)

CREATE TABLE string_vals (
    id INTEGER NOT NULL, 
    val varchar(50), 
    CONSTRAINT pk_string_values PRIMARY KEY (id), 
    CONSTRAINT fk_string_values_id_values FOREIGN KEY(id) REFERENCES vals (
id)
)

INSERT INTO string_vals (id, val)
VALUES
(6, 'one'),
(7, 'two'),
(8, 'three'),
(9, 'four'),
(10, 'five')


SELECT COALESCE(iv.val, sv.val) as val FROM
vals v
LEFT JOIN int_vals iv on iv.id = v.id
LEFT JOIN string_vals sv on sv.id = v.id
WHERE v.variable_id = 1

This is only sort of close because COALESCE implicitly converts the string 
to the int and blows up. But if that didn't happen it would be the behavior 
I want.


For dynamic PIVOT queries, it works *really* differently between SQL Server 
and postgres. Both of them are ugly, and SQL Server is also gross. Postgres 
is more complex to set up because outputs of pivots have to have column 
types defined, and SQL Server does not care at all, which I'm assuming 
means that everything is cast to varchar and defeats my purpose for trying 
to maintain types.

But I'll show it in SQL Server because that's what I have in front of me. 
For this case, I'm leaving the first part of coalescing alone and using a 
single table to store all the values as strings.

CREATE TABLE single_values (
    id INTEGER NOT NULL, 
    uid varchar(50) NOT NULL, 
    variable_id INT NOT NULL, 
    observation_id varchar(50) NOT NULL, 
    value_type varchar(50) NOT NULL, 
    value varchar(50), 
    CONSTRAINT pk_single_values PRIMARY KEY (id), 
    CONSTRAINT fk_single_values_variable_id_variables FOREIGN KEY(
variable_id) REFERENCES variables (id)
)

INSERt INTO single_values (id, uid, variable_id, observation_id, value_type, 
value)
values
(6, 'val6', 2, 'respid1', 'string', '6'),
(7, 'val7', 2, 'respid2', 'string', '7'),
(8, 'val8', 2, 'respid3', 'string', '8'),
(9, 'val9', 2, 'respid4', 'string', '9'),
(10, 'val10', 2, 'respid5', 'string', '10'),
(1, 'val1', 1, 'respid1', 'int', '1'),
(2, 'val2', 1, 'respid2', 'int', '2'),
(3, 'val3', 1, 'respid3', 'int', '3'),
(4, 'val4', 1, 'respid4', 'int', '4'),
(5, 'val5', 1, 'respid5', 'int', '5'),
(11, 'val11', 3, 'respid1', 'float', '1.0'),
(12, 'val12', 3, 'respid2', 'float', '2.0'),
(13, 'val13', 3, 'respid3', 'float', '3.0'),
(14, 'val14', 3, 'respid4', 'float', '4.0'),
(15, 'val15', 3, 'respid5', 'float', '5.0')


DECLARE @DynamicPivotQuery AS NVARCHAR(MAX)

;WITH ColumnNames (names) AS (
SELECT TOP 1 STUFF((SELECT ',' + QUOTENAME(name)
FROM variables
WHERE dataset_id = 1
FOR XML PATH('')), 1,1,'') [cols]
FROM (SELECT DISTINCT name FROM variables WHERE dataset_id = 1) n
)

SELECT @DynamicPivotQuery = 
  N'SELECT observation_id, ' +  (SELECT names FROM ColumnNames) + '
    FROM 
        (SELECT sv.observation_id, v.name, sv.value
        FROM single_values sv
        INNER JOIN variables v on v.id = sv.variable_id
        WHERE v.dataset_id = 1) s
        PIVOT(MAX(value) 
          FOR name IN (' + (SELECT names FROM ColumnNames) + ')) p'
--print @DynamicPivotQuery
EXEC sp_executesql @DynamicPivotQuery

Like I said, it's not only ugly, it's also gross. When I get home from work 
today, I can show how I would do it in postgres if that's at all useful to 
anyone.

Anyway, like I said above, I think your polymorphic example is where I want 
to go with the data model. I just need to understand it because I hate not 
understanding code I'm using. I suspect that for some of these cases, I 
need to either, use pandas to do the pivot, which is fineas long as I'm 
keeping types. Or I should call a stored procedure to do the dynamic pivot 
if I just need to grab the whole dataset in that format.

Thanks!



On Thursday, February 28, 2019 at 12:34:16 PM UTC-6, Andrew Martin wrote:
>
> Hi All,
>
> Trying something out here for an analytics app here, and I'm not sure any 
> of this is a good idea. I'm trying to create a data model that's flexible 
> enough to handle a lot of different types of analysis. From plain CSVs to  
> time series to survey questions with complex hierarchies. My approach is to 
> have a pretty plain model and query as needed to construct dataframes that 
> will end up in pandas and sci-kit learn.
>
> The basic idea is that there's a dataset that has variables associated 
> with it. Variables have values associated. I want to retain type 
> information on the values. If I'm reading a file and loading it, I want to 
> store an int as an int or a datetime as a datetime and not stuff everything 
> into a string and have to guess what things are based on variable metadata. 
> That's where I'm kind of stuck. I think I have the multi-table inheritance 
> set up correctly to store different types of value. I'm totally unsure how 
> to query without having to know the subclass. I'd like to be able to 
> extract values for pandas processing with something like this:
>
> vals = Values.value.filter_by(variable_id=123456).all()
>
> without having to know per variable which subclass to choose. I.e., I 
> don't want to have to do
> vals = IntValue.value.filter_by(variable_id=123456).all()
>
>
> The second part of my question is only relevant if this whole design isn't 
> hosed, which I'm fine if you tell me that it is and I need to go back to 
> the drawing board.
>
> The second part is how to extract an entire dataset by ID in a way that 
> would be comfortable in pandas. If I were doing this in raw SQL, I'd use a 
> CTE to get the variables by dataset id and use a pivot to create the 
> data-frame-shaped table. I'm confounded by how to do this in alchemy, 
> especially with the polymorphic setup.
>
>
> Here are my current model definitions:
>
>
> from sqlalchemy import Column, Index, Integer, Text, DateTime, Float, 
> ForeignKey
> from sqlalchemy.orm import relationship
>
> from .meta import Base
>
>
> class DataSet(Base):
>     __tablename__ = 'datasets'
>     id = Column(Integer, primary_key=True)
>     name = Column(Text, nullable=False)
>     description = Column(Text, nullable=True)
>
>
>
> class Variable(Base):
>     __tablename__ = 'variables'
>     id = Column(Integer, primary_key=True)
>     dataset_id = Column(Integer, ForeignKey('datasets.id'), Nullable=False
> )
>     name = Column(Text, nullable=False)
>     description = Column(Text, nullable=True)
>     group_var_col_id = Column(Integer, ForeignKey('variables.id'), 
> nullable=True)
>     group_var_row_id = Column(Integer, ForeignKey('variables.id'), 
> nullable=True)
>
>     value_map = Column(Text, nullable=True) #change to JSONB when move 
> from SQLite to Postgres
>
>     dataset = relationship('DataSet', backref='variables')
>
>
> class Value(Base):
>     __tablename__ = 'values'
>     id = Column(Integer, primary_key=True)
>     variable_id = Column(Integer, ForeignKey('variables.id'), Nullable=
> False)
>     observation_id = Column(Text, nullable=False)
>     value_type = Column(Text, nullable=False)
>
>     variable = relationship('Variable', backref='values')
>
>     __mapper_args__ = {'polymorphic_on': value_type}
>
>
> class IntValue(Value):
>     __tablename__ = 'int_values'
>     id = Column(None, ForeignKey('values.id'), primary_key=True)
>     value = Column(Integer, nullable=True)
>
>     __mapper_args__ = {'polymorphic_identity': 'int'}
>
>
> class StringValue(Value):
>     __tablename__ = 'string_values'
>     id = Column(None, ForeignKey('values.id'), primary_key=True)
>     value = Column(Text, nullable=True)
>
>     __mapper_args__ = {'polymorphic_identity': 'string'}
>
>
> class FloatValue(Value):
>     __tablename__ = 'float_values'
>     id = Column(None, ForeignKey('values.id'), primary_key=True)
>     value = Column(Float, nullable=True)
>
>     __mapper_args__ = {'polymorphic_identity': 'float'}
>
>
> class DateTimeValue(Value):
>     __tablename__ = 'datetime_values'
>     id = Column(None, ForeignKey('values.id'), primary_key=True)
>     value = Column(DateTime, nullable=True)
>
>     __mapper_args__ = {'polymorphic_identity': 'datetime'}
>
> Thanks in advance for any suggestions you might have!
>

-- 
SQLAlchemy - 
The Python SQL Toolkit and Object Relational Mapper

http://www.sqlalchemy.org/

To post example code, please provide an MCVE: Minimal, Complete, and Verifiable 
Example.  See  http://stackoverflow.com/help/mcve for a full description.
--- 
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to sqlalchemy+unsubscr...@googlegroups.com.
To post to this group, send email to sqlalchemy@googlegroups.com.
Visit this group at https://groups.google.com/group/sqlalchemy.
For more options, visit https://groups.google.com/d/optout.

[sqlalchemy] Re: Polymorphic Query All regardless of subclass

Reply via email to