iangow commented on issue #1755:
URL: https://github.com/apache/arrow-adbc/issues/1755#issuecomment-4141529498

   In case it's helpful for any replication analysis you might do, I provide 
some details on the empirical distribution of the underlying types for the 3 
million rows for that query here (this is R code, but hopefully its pretty 
transparent what SQL it is running):
   
   ``` r
   library(DBI)
   library(dplyr, warn.conflicts = FALSE)
   library(dbplyr, warn.conflicts = FALSE)
   
   db <- dbConnect(RPostgres::Postgres(), 
                   host = "wrds-pgdata.wharton.upenn.edu", 
                   port = 9737L, 
                   user = Sys.getenv("WRDS_ID"),
                   dbname ="wrds",
                   bigint = "integer")
   
   pg_attribute_tbl <- 
       tbl(db, in_schema("pg_catalog", "pg_attribute")) |>
       select(attrelid, attnum, attname, atttypid)
   
   pg_type_tbl <- 
       tbl(db, in_schema("pg_catalog", "pg_type")) |>
       select(
           oid,
           typname,
           typtype,
           typreceive,
           typsend,
           typbasetype,
           typrelid,
           typarray
       )
   
   attribute_types <- 
       pg_attribute_tbl |>
       left_join(pg_type_tbl, by = c("atttypid" = "oid")) |>
       mutate(
           driver_bucket = case_when(
               sql("typreceive::text = 'array_recv'") ~ "array (resolved from 
pg_type/typarray)",
               typrelid != 0L ~ "composite/record (needs pg_attribute via 
typrelid)",
               typbasetype != 0L ~ "domain (resolved from pg_type via 
typbasetype)",
               TRUE ~ "direct pg_type lookup"
           )
       )
   
   attribute_types |>
       count(driver_bucket, sort = TRUE) |>
       collect()
   #> # A tibble: 3 × 2
   #>   driver_bucket                                        n
   #>   <chr>                                            <int>
   #> 1 direct pg_type lookup                          3563772
   #> 2 domain (resolved from pg_type via typbasetype)     683
   #> 3 array (resolved from pg_type/typarray)              80
   
   attribute_types |>
       count(driver_bucket, typname, sort = TRUE) |>
       collect() |>
       print(n = Inf)
   #> # A tibble: 54 × 3
   #>    driver_bucket                                  typname               n
   #>    <chr>                                          <chr>             <int>
   #>  1 direct pg_type lookup                          varchar         1062019
   #>  2 direct pg_type lookup                          float8           707320
   #>  3 direct pg_type lookup                          numeric          319186
   #>  4 direct pg_type lookup                          int4             274798
   #>  5 direct pg_type lookup                          text             258006
   #>  6 direct pg_type lookup                          xid              159289
   #>  7 direct pg_type lookup                          cid              159278
   #>  8 direct pg_type lookup                          date             137778
   #>  9 direct pg_type lookup                          int2             125724
   #> 10 direct pg_type lookup                          time             117338
   #> 11 direct pg_type lookup                          oid              106590
   #> 12 direct pg_type lookup                          tid               79639
   #> 13 direct pg_type lookup                          int8              37160
   #> 14 direct pg_type lookup                          bytea             13265
   #> 15 direct pg_type lookup                          timestamp          5437
   #> 16 domain (resolved from pg_type via typbasetype) sql_identifier      448
   #> 17 direct pg_type lookup                          bool                428
   #> 18 direct pg_type lookup                          name                153
   #> 19 domain (resolved from pg_type via typbasetype) character_data      106
   #> 20 domain (resolved from pg_type via typbasetype) cardinal_number      76
   #> 21 direct pg_type lookup                          tsvector             68
   #> 22 direct pg_type lookup                          timestamptz          65
   #> 23 domain (resolved from pg_type via typbasetype) yes_or_no            50
   #> 24 direct pg_type lookup                          char                 45
   #> 25 direct pg_type lookup                          regproc              36
   #> 26 direct pg_type lookup                          cstring              34
   #> 27 array (resolved from pg_type/typarray)         _text                30
   #> 28 direct pg_type lookup                          jsonb                17
   #> 29 direct pg_type lookup                          pg_lsn               16
   #> 30 direct pg_type lookup                          pg_node_tree         16
   #> 31 direct pg_type lookup                          float4               14
   #> 32 direct pg_type lookup                          anyarray             14
   #> 33 array (resolved from pg_type/typarray)         _aclitem             14
   #> 34 array (resolved from pg_type/typarray)         _float4              11
   #> 35 array (resolved from pg_type/typarray)         _oid                  9
   #> 36 direct pg_type lookup                          regclass              8
   #> 37 direct pg_type lookup                          interval              8
   #> 38 direct pg_type lookup                          oidvector             6
   #> 39 direct pg_type lookup                          int2vector            6
   #> 40 array (resolved from pg_type/typarray)         _name                 4
   #> 41 array (resolved from pg_type/typarray)         _int2                 3
   #> 42 domain (resolved from pg_type via typbasetype) time_stamp            3
   #> 43 array (resolved from pg_type/typarray)         _char                 3
   #> 44 direct pg_type lookup                          <NA>                  2
   #> 45 direct pg_type lookup                          pg_dependencies       2
   #> 46 direct pg_type lookup                          inet                  2
   #> 47 direct pg_type lookup                          pg_ndistinct          2
   #> 48 array (resolved from pg_type/typarray)         _regtype              2
   #> 49 array (resolved from pg_type/typarray)         _float8               2
   #> 50 direct pg_type lookup                          tsquery               1
   #> 51 direct pg_type lookup                          pg_mcv_list           1
   #> 52 direct pg_type lookup                          regtype               1
   #> 53 array (resolved from pg_type/typarray)         _bool                 1
   #> 54 array (resolved from pg_type/typarray)         _pg_statistic         1
   ```
   
   <sup>Created on 2026-03-27 with [reprex 
v2.1.1](https://reprex.tidyverse.org)</sup>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to