CIGALE-W3

From Public PIC Wiki
Revision as of 00:48, 3 December 2020 by Jcarrete (talk | contribs) (Created page with "I get a FITS file from Gosia: /cephfs/pic.es/astro/scratch/jcarrete/sandbox/PAU/CIGALE/catalogue_cosmohub.fits' I have to make several things to convert it into a parque...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

I get a FITS file from Gosia:

   /cephfs/pic.es/astro/scratch/jcarrete/sandbox/PAU/CIGALE/catalogue_cosmohub.fits'

I have to make several things to convert it into a parquet file.

Finally I use the FITS driver from pyspark made by Pau. I use the following notebook run using spark notebooks (mocks environment):

   /nfs/pic.es/user/j/jcarrete/python_notebooks/PAU/PAU_CIGALE_catalogs.ipynb

Catalogs in parquet format are stored here:

   /user/jcarrete/data/paudm/paus_cigale/

I create the external table:

   /software/astro/scripts/create_table_from_parquet.sh -p /hdfs/user/jcarrete/data/paudm/paus_cigale/part-00001-4cd469f3-49f3-4e46-b839-50599cbc7879-c000.snappy.parquet -t jcarrete.paus_cigale_v1_pq -e
   CREATE EXTERNAL TABLE jcarrete.paus_cigale_v1_pq (
   `ref_id` BIGINT,
   `class_2` INT,
   `P2_red` DOUBLE,
   `P2_blue` DOUBLE,
   `class_3` INT,
   `P3_red` DOUBLE,
   `P3_blue` DOUBLE,
   `P3_green` DOUBLE,
   `bcnz_zb` FLOAT,
   `z_b` DOUBLE,
   `lp_log10_sm_med` FLOAT,
   `lp_mu` DOUBLE,
   `lp_mg` DOUBLE,
   `lp_mi` DOUBLE,
   `lp_mr` DOUBLE,
   `lp_mz` DOUBLE,
   `star_flag` INT,
   `g_i` DOUBLE,
   `g_i_err` DOUBLE,
   `u_i_megaprime` DOUBLE,
   `u_i_megaprime_err` DOUBLE,
   `r_z` DOUBLE,
   `g_r` DOUBLE,
   `u_g` DOUBLE,
   `r_z_err` DOUBLE,
   `u_i` DOUBLE,
   `u_i_err` DOUBLE,
   `u_r` DOUBLE,
   `u-r_err` DOUBLE,
   `Lnu_g_prime` DOUBLE,
   `Lnu_g_prime_err` DOUBLE,
   `Lnu_i_prime` DOUBLE,
   `Lnu_i_prime_err` DOUBLE,
   `Lnu_megaprime_i` DOUBLE,
   `Lnu_megaprime_i_err` DOUBLE,
   `Lnu_r_prime` DOUBLE,
   `Lnu_r_prime_err` DOUBLE,
   `Lnu_u_prime` DOUBLE,
   `Lnu_u_prime_err` DOUBLE,
   `Lnu_z_prime` DOUBLE,
   `Lnu_z_prime_err` DOUBLE,
   `u_abs` DOUBLE,
   `g_abs` DOUBLE,
   `r_abs` DOUBLE,
   `i_abs` DOUBLE,
   `i_abs_megaprime` DOUBLE,
   `z_abs` DOUBLE,
   `sfr10Myrs_log` DOUBLE,
   `sfr10Myrs_log_err` DOUBLE,
   `sfr_log` DOUBLE,
   `sfr_log_err` DOUBLE,
   `m_star_log` DOUBLE,
   `m_star_log_err` DOUBLE,
   `reduced_chi_square` DOUBLE
   )
   STORED AS PARQUET
   LOCATION '/user/jcarrete/data/paudm/paus_cigale'
   ; 


   CREATE TABLE cosmohub.paus_cigale_v1_c (
   `ref_id` BIGINT COMMENT "PAU id",
   `class_2` INT COMMENT "class number in 2-cluster classification (classes 1 and 2 refer to red and blue galaxies, respectively)",
   `P2_red` DOUBLE COMMENT "class membership probability of belonging to red class in 2-cluster classification",
   `P2_blue` DOUBLE COMMENT "class membership probability of belonging to blue class in 2-cluster classification",
   `class_3` INT COMMENT "class number in 3-cluster classification (classes 1, 2, 3 refers to red, blue, green galaxies, respectively)",
   `P3_red` DOUBLE COMMENT "class membership probability of belonging to red class in 3-cluster classification",
   `P3_blue` DOUBLE COMMENT "class membership probability of belonging to blue class in 3-cluster classification",
   `P3_green` DOUBLE COMMENT "class membership probability of belonging to green class in 3-cluster classification",
   `bcnz_zb` FLOAT COMMENT "PAU redshift", 
   `z_b` DOUBLE COMMENT "CFHTLenS redshift",
   `lp_log10_sm_med` FLOAT COMMENT "CFHTLenS stellar mass and abs magnitude",
   `lp_mu` DOUBLE COMMENT "CFHTLenS stellar mass and abs magnitude",
   `lp_mg` DOUBLE COMMENT "CFHTLenS stellar mass and abs magnitude",
   `lp_mi` DOUBLE COMMENT "CFHTLenS stellar mass and abs magnitude",
   `lp_mr` DOUBLE COMMENT "CFHTLenS stellar mass and abs magnitude",
   `lp_mz` DOUBLE COMMENT "CFHTLenS stellar mass and abs magnitude",
   `star_flag` INT COMMENT "CFHTLenS star flag (1 for stars)",
   `g_i` DOUBLE COMMENT "color",
   `g_i_err` DOUBLE COMMENT "color error",
   `u_i_megaprime` DOUBLE COMMENT "color",
   `u_i_megaprime_err` DOUBLE COMMENT "color error",
   `r_z` DOUBLE COMMENT "color",
   `g_r` DOUBLE COMMENT "color",
   `u_g` DOUBLE COMMENT "color",
   `r_z_err` DOUBLE COMMENT "color error",
   `u_i` DOUBLE COMMENT "color",
   `u_i_err` DOUBLE COMMENT "color error",
   `u_r` DOUBLE COMMENT "color",
   `u-r_err` DOUBLE COMMENT "color error",
   `Lnu_g_prime` DOUBLE,
   `Lnu_g_prime_err` DOUBLE,
   `Lnu_i_prime` DOUBLE,
   `Lnu_i_prime_err` DOUBLE,
   `Lnu_megaprime_i` DOUBLE,
   `Lnu_megaprime_i_err` DOUBLE,
   `Lnu_r_prime` DOUBLE COMMENT "rest-framed luminosity",
   `Lnu_r_prime_err` DOUBLE COMMENT "rest-framed luminosity error",
   `Lnu_u_prime` DOUBLE COMMENT "rest-framed luminosity",
   `Lnu_u_prime_err` DOUBLE COMMENT "rest-framed luminosity error",
   `Lnu_z_prime` DOUBLE COMMENT "rest-framed luminosity",
   `Lnu_z_prime_err` DOUBLE COMMENT "rest-framed luminosity error",
   `u_abs` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `g_abs` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `r_abs` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `i_abs` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `i_abs_megaprime` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `z_abs` DOUBLE COMMENT "absolute magnitudes calculated from luminosity using eq. -2.5log(Lnu)+34.1",
   `sfr10Myrs_log` DOUBLE COMMENT "(log) star formation rates",
   `sfr10Myrs_log_err` DOUBLE COMMENT "(log) star formation rates error",
   `sfr_log` DOUBLE COMMENT "(log) star formation rates",
   `sfr_log_err` DOUBLE COMMENT "(log) star formation rates error",
   `m_star_log` DOUBLE COMMENT "(log) stellar mass",
   `m_star_log_err` DOUBLE COMMENT "(log) stellar mass error",
   `reduced_chi_square` DOUBLE COMMENT "reduced_chi_square"
   )
   CLUSTERED BY ( 
 ref_id) 
   SORTED BY ( 
 ref_id ASC) 
   INTO 4 BUCKETS
   ROW FORMAT SERDE 
 "org.apache.hadoop.hive.ql.io.orc.OrcSerde" 
   STORED AS INPUTFORMAT 
 "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat" 
   OUTPUTFORMAT 
 "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"
  ;


INSERT OVERWRITE TABLE cosmohub.paus_cigale_v1_c SELECT * FROM jcarrete.paus_cigale_v1_pq

ANALYZE TABLE cosmohub.paus_cigale_v1_c COMPUTE STATISTICS FOR COLUMNS;