Hi,
I'm having some difficulty with the usage of a local database
installation. The code I'm trying to use with the local database
follows, using R's biomaRt package. A transcript of the local database
installation is attached.
The local database is mysql, using data from ensembl.org, specifically
ftp://ftp.ensembl.org/pub/release-50/mysql/ensembl_mart_50/ and
ftp://ftp.ensembl.org/pub/release-50/mysql/snp_mart_50/.
If anyone can tell me what I'm doing wrong, I would appreciate it. If this
is off-topic for this ml, please ignore.
Also, I got a hang in the mysql interpreter when I tried to import
#mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__variation__main.txt'
INTO TABLE hsapiens_snp__variation__main;
so I'm commenting that one out for now in the transcript. Not much
point trying again, since at the moment I don't even know if it is
relevant.
I include the output of the script below, with the verbose flag for getBM
turned on. Modified script follows.
Thanks, Faheem.
****************************************************************************
source("biomart.R")
Loading required package: RMySQL
Loading required package: DBI
Reading database configuration of: hsapiens_snp
Checking attributes and filters ... ok
Checking main tables ... ok
Reading database configuration of: hsapiens_gene_ensembl
Checking attributes and filters ... ok
Checking main tables ... ok
[1] "SELECT DISTINCT hsapiens_snp__variation__main.name_2025,
hsapiens_snp__variation_feature__main.name_1059,
hsapiens_snp__variation_feature__main.seq_region_start_2026,
hsapiens_snp__variation_feature__main.seq_region_strand_2026,
hsapiens_snp__transcript_variation__dm.stable_id_1023,
hsapiens_snp__variation_feature__main.allele_string_2026,
hsapiens_snp__transcript_variation__dm.biotype_1064 FROM
hsapiens_snp__variation__main INNER JOIN (
hsapiens_snp__variation_feature__main,hsapiens_snp__transcript_variation__dm )
ON ( hsapiens_snp__variation_feature__main.variation_id_2025_key =
hsapiens_snp__variation__main.variation_id_2025_key AND
hsapiens_snp__transcript_variation__dm.variation_id_2025_key =
hsapiens_snp__variation__main.variation_id_2025_key) WHERE
hsapiens_snp__variation__main.name_2025 IN ('rs12726453')"
Error in mysqlExecStatement(conn, statement, ...) :
RS-DBI driver: (could not run statement: Unknown column
'hsapiens_snp__transcript_variation__dm.variation_id_2025_key' in 'on clause')
*****************************************************************************
library("biomaRt")
SNPanno=function(rsid,dbmart,enmart, verbose=FALSE)
{
attrib=c("refsnp_id","chr_name","snp_chrom_start","snp_chrom_strand","ensembl_gene_stable_id","snp_allele","ensembl_type")
info=getBM(attributes=attrib,filters="refsnp",values=rsid, mart=dbmart,
verbose=verbose)
if(!is.null(info))
{
genename=getBM(attributes="external_gene_id",filters="ensembl_gene_id",values=as.character(info[5]),mart=enmart,
verbose=verbose)
if(!is.null(genename))
{
info=data.frame(info,genename)
}
else
{
info=data.frame(info,genename=NA)
}
}
else
{
info=paste("no annotation info found for", rsid)
}
return(info)
}
rsid = "rs12726453"
#mart=useMart("snp",dataset="hsapiens_snp")
#en=useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")
mart=useMart(biomart="snp_mart_50", mysql=TRUE, host="localhost",
user="faheem", password="e=mc^2", local=TRUE, dataset="hsapiens_snp")
en=useMart(biomart="ensembl_mart_50", mysql=TRUE, host="localhost",
user="faheem", password="e=mc^2", local=TRUE, dataset="hsapiens_gene_ensembl"
)
snp = SNPanno(rsid,mart,en, verbose=TRUE)
# Instructions for installing ensembl database on Debian. Comments start with
#. Current version of * is 50.
STEP 0:
$ sudo apt-get install mysql-server
# Add faheem as user.
$ mysql --user=root mysql
mysql> GRANT ALL PRIVILEGES ON *.* TO 'faheem'@'localhost' IDENTIFIED BY
'passwd' WITH GRANT OPTION;
mysql> GRANT ALL PRIVILEGES ON *.* TO 'faheem'@'%' IDENTIFIED BY 'passwd' WITH
GRANT OPTION;
# Log in as faheem.
STEP 1: Create ensembl_mart and snp_mart databases.
$ mysql -A --user=faheem --password='passwd'
mysql> CREATE DATABASE ensembl_mart_*;
mysql> CREATE DATABASE snp_mart_*;
mysql> SHOW DATABASES;
+--------------------+
| Database |
+--------------------+
| information_schema |
| ensembl_mart_50 |
| mysql |
| snp_mart_50 |
+--------------------+
STEP 2: Download and unzip sql files for creating and populating database
tables/
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/ensembl_mart_*/ensembl_mart_*.sql.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/ensembl_mart_*/meta_conf__dataset__main.txt.gz
--output-document=ensembl_meta_conf__dataset__main.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/ensembl_mart_*/meta_conf__xml__dm.txt.gz
--output-document=ensembl_meta_conf__xml__dm.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/ensembl_mart_*/hsapiens_gene_ensembl__gene__main.txt.gz
$ wget -c ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/snp_mart_*.sql.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/meta_conf__dataset__main.txt.gz
--output-document=snp_meta_conf__dataset__main.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/meta_conf__xml__dm.txt.gz
--output-document=snp_meta_conf__xml__dm.txt.gz
#$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__variation__main.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__variation_synonym_Affy6__dm.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__variation_synonym_dbSNP__dm.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__transcript_variation__dm.001.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__transcript_variation__dm.002.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__transcript_variation__dm.003.txt.gz
$ wget -c
ftp://ftp.ensembl.org/pub/release-*/mysql/snp_mart_*/hsapiens_snp__transcript_variation__dm.004.txt.gz
$ gunzip *.gz
STEP 3: Create and populate ensembl_mart tables.
# Connect to ensembl_mart_*.
mysql> \r ensembl_mart_*
# SQL script creates tables in ensembl_mart_*.
$ mysql -D ensembl_mart_* -u faheem -p < ensembl_mart_*.sql
mysql> SHOW TABLES;
[list of created tables...]
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/ensembl_meta_conf__dataset__main.txt' INTO TABLE
meta_conf__dataset__main;
mysql> LOAD DATA INFILE '/home/faheem/ensembl/ensembl_meta_conf__xml__dm.txt'
INTO TABLE meta_conf__xml__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_gene_ensembl__gene__main.txt' INTO TABLE
hsapiens_gene_ensembl__gene__main;
STEP 4: Create and populate snp_mart tables.
# Connect to snp_mart_*.
mysql> \r snp_mart_*
# SQL script creates tables in snp_mart_*.
$ mysql -D snp_mart_* -u faheem -p < snp_mart_*.sql
mysql> SHOW TABLES;
[list of created tables...]
mysql> LOAD DATA INFILE '/home/faheem/ensembl/snp_meta_conf__dataset__main.txt'
INTO TABLE meta_conf__dataset__main;
mysql> LOAD DATA INFILE '/home/faheem/ensembl/snp_meta_conf__xml__dm.txt' INTO
TABLE meta_conf__xml__dm;
#mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__variation__main.txt' INTO TABLE
hsapiens_snp__variation__main;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__variation_synonym_Affy6__dm.txt' INTO TABLE
hsapiens_snp__variation_synonym_Affy6__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__variation_synonym_dbSNP__dm.txt' INTO TABLE
hsapiens_snp__variation_synonym_dbSNP__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__transcript_variation__dm.001.txt' INTO
TABLE hsapiens_snp__transcript_variation__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__transcript_variation__dm.002.txt' INTO
TABLE hsapiens_snp__transcript_variation__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__transcript_variation__dm.003.txt' INTO
TABLE hsapiens_snp__transcript_variation__dm;
mysql> LOAD DATA INFILE
'/home/faheem/ensembl/hsapiens_snp__transcript_variation__dm.004.txt' INTO
TABLE hsapiens_snp__transcript_variation__dm;