Difference between revisions of "GLADE"

From Public PIC Wiki
Jump to navigation Jump to search
(Created page with "=== Version 2.4 === Previous version (2.3) was uploaded by Pau. I used the same schema used for 2.3 since checking by eye both catalogs have the same fields. I take the catal...")
 
Line 18: Line 18:
 
     hdfs dfs -mkdir /user/jcarrete/data/glade/2.4
 
     hdfs dfs -mkdir /user/jcarrete/data/glade/2.4
  
     hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/jcarrete/sandbox/GLADE/GLADE_2.4.txt /user/jcarrete/data/glade/2.4/GLADE_2.4.txt
+
     hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/jcarrete/sandbox/GLADE/GLADE_2.4.txt /user/jcarrete/data/GLADE/2.4/GLADE_2.4.txt
  
 
To create the external table:
 
To create the external table:

Revision as of 09:02, 21 October 2020

Version 2.4

Previous version (2.3) was uploaded by Pau. I used the same schema used for 2.3 since checking by eye both catalogs have the same fields.

I take the catalog from the following link:

http://glade.elte.hu/Download.html

In particular with this command from the following location:

   /cephfs/pic.es/astro/scratch/jcarrete/sandbox/GLADE
   wget http://aquarius.elte.hu/glade/GLADE_2.4.txt

Then I create the path in the hdfs and copy the file:

   hdfs dfs -mkdir /user/jcarrete/data/glade
   hdfs dfs -mkdir /user/jcarrete/data/glade/2.4
   hdfs dfs -copyFromLocal /cephfs/pic.es/astro/scratch/jcarrete/sandbox/GLADE/GLADE_2.4.txt /user/jcarrete/data/GLADE/2.4/GLADE_2.4.txt

To create the external table:

CREATE EXTERNAL TABLE jcarrete.glade_2_4_ssv(
 `pgc` int COMMENT 'PGC number', 
 `gwgc_name` string COMMENT 'Name in the GWGC catalog', 
 `hyperleda_name` string COMMENT 'Name in the HyperLEDA catalog', 
 `2mass_name` string COMMENT 'Name in the 2MASS XSC catalog', 
 `sdss_dr12_name` string COMMENT 'Name in the SDSS-DR12 QSO catalog', 
 `flag1` char(1) COMMENT 'Q: the source is from the SDSS-DR12 QSO catalog C: the source is a globular cluster G: the source is from another catalog and not identified as a globular cluster', 
 `ra` float COMMENT 'Right ascention [deg]', 
 `dec` float COMMENT 'Declination [deg]', 
 `dist` float COMMENT 'Luminosity distance [Mpc]', 
 `dist_err` float COMMENT 'Error of distance [Mpc]', 
 `z` float COMMENT 'Redshift', 
 `b` float COMMENT 'Apparent B magnitude', 
 `b_err` float COMMENT 'Error of apparent B magnitude', 
 `b_abs` float COMMENT 'Absolute B magnitude', 
 `j` float COMMENT 'Apparent J magnitude', 
 `j_err` float COMMENT 'Error of apparent J magnitude', 
 `h` float COMMENT 'Apparent H magnitude', 
 `h_err` float COMMENT 'Error of apparent H magnitude', 
 `k` float COMMENT 'Apparent K magnitude', 
 `k_err` float COMMENT 'Error of apparent K magnitude', 
 `flag2` tinyint COMMENT '0: the galaxy had neither measured distance nor measured redshift value\n1: the galaxy had measured redshift value, from which we have calculated distance using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$\n2: the galaxy had measured distance value from which we have calculated redshift using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$\n3: The measured photometric redshift of the galaxy has been changed to spectroscopic redshift, from which we have calculated distance using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$', 
 `flag3` tinyint COMMENT '0: velocity field correction has not been applied to the object 1: we have subtracted the radial velocity of the object')
ROW FORMAT SERDE 
 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
WITH SERDEPROPERTIES ( 
 'field.delim'=' ', 
 'serialization.format'=' ') 
STORED AS INPUTFORMAT 
 'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
 'hdfs://AntNest/user/jcarrete/data/GLADE/2.4/'
;


To create CosmoHub table:

CREATE TABLE cosmohub.glade_2_4_c(
 `pgc` int COMMENT 'PGC number', 
 `gwgc_name` string COMMENT 'Name in the GWGC catalog', 
 `hyperleda_name` string COMMENT 'Name in the HyperLEDA catalog', 
 `2mass_name` string COMMENT 'Name in the 2MASS XSC catalog', 
 `sdss_dr12_name` string COMMENT 'Name in the SDSS-DR12 QSO catalog', 
 `flag1` char(1) COMMENT 'Q: the source is from the SDSS-DR12 QSO catalog C: the source is a globular cluster G: the source is from another catalog and not identified as a globular cluster', 
 `ra` float COMMENT 'Right ascention [deg]', 
 `dec` float COMMENT 'Declination [deg]', 
 `dist` float COMMENT 'Luminosity distance [Mpc]', 
 `dist_err` float COMMENT 'Error of distance [Mpc]', 
 `z` float COMMENT 'Redshift', 
 `b` float COMMENT 'Apparent B magnitude', 
 `b_err` float COMMENT 'Error of apparent B magnitude', 
 `b_abs` float COMMENT 'Absolute B magnitude', 
 `j` float COMMENT 'Apparent J magnitude', 
 `j_err` float COMMENT 'Error of apparent J magnitude', 
 `h` float COMMENT 'Apparent H magnitude', 
 `h_err` float COMMENT 'Error of apparent H magnitude', 
 `k` float COMMENT 'Apparent K magnitude', 
 `k_err` float COMMENT 'Error of apparent K magnitude', 
 `flag2` tinyint COMMENT '0: the galaxy had neither measured distance nor measured redshift value\n1: the galaxy had measured redshift value, from which we have calculated distance using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$\n2: the galaxy had measured distance value from which we have calculated redshift using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$\n3: The measured photometric redshift of the galaxy has been changed to spectroscopic redshift, from which we have calculated distance using the following cosmological parameters: $H_0=70, Omega_{M}=0.27, Omega_{Lambda}=0.73$', 
 `flag3` tinyint COMMENT '0: velocity field correction has not been applied to the object 1: we have subtracted the radial velocity of the object')
CLUSTERED BY ( 
 pgc, 
 gwgc_name, 
 hyperleda_name, 
 2mass_name, 
 sdss_dr12_name) 
INTO 4 BUCKETS
ROW FORMAT SERDE 
 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
STORED AS INPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
OUTPUTFORMAT 
 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
;


   INSERT OVERWRITE TABLE cosmohub.glade_2_4_c
   SELECT pgc, gwgc_name, hyperleda_name, 2mass_name, sdss_dr12_name, flag1, ra, `dec`, dist, dist_err, z, b, b_err, b_abs, j, j_err, h, h_err, k, k_err, flag2, flag3
   FROM jcarrete.glade_2_4_ssv;

Statistics:

   ANALYZE TABLE cosmohub.glade_2_4_c COMPUTE STATISTICS FOR COLUMNS;