QSOs
Jump to navigation
Jump to search
The Primeval Universe working group, lead by Jean-Gabriel Cubi sent us an email with information (email's subject: SC8 PUWG simulations: quasars) They wrote an entry in the Euclid Redmine: https://euclid.roe.ac.uk/projects/puswg/wiki/Quasars/
There is a txt file that we "convert" and ingest into CosmoHub.
(I think) we convert it into parquet:
CREATE EXTERNAL TABLE `full_h25cut_hcol`( `type` string, `z` float, `m1450` float, `template` string, `h` float, `id` bigint) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' LOCATION 'hdfs://AntNest/user/tallada/data/euclid/full_H25cut_Hcol' ;
Then the clustered table in cosmohub:
CREATE TABLE full_H25cut_Hcol_v1_0_c( `id` bigint COMMENT 'ID', `type` string COMMENT 'type (QSO for all objects)', `z` float COMMENT 'redshift (z). Redshifts are in the range [6-15]', `m1450` float COMMENT 'absolute magnitude at 1450 angstroms (M1450)', `template` string COMMENT 'spectral template name (among 21)', `h` float COMMENT 'approximate H-band magnitude for information only. This was computed for applying the magnitude cut' ) CLUSTERED BY ( id ) SORTED BY ( id ASC ) INTO 4 BUCKETS ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
INSERT OVERWRITE TABLE cosmohub.full_H25cut_Hcol_v1_0_c SELECT `id`, `type`, `z`,`m1450`, `template`, `h` FROM tallada.full_H25cut_Hcol;
ANALYZE TABLE cosmohub.full_H25cut_Hcol_v1_0_c COMPUTE STATISTICS FOR COLUMNS;
I decided to create an integer field for the sed template rather than a string field, in order to use it in CosmoHub and maybe for the mock algorithm:
CREATE TABLE full_H25cut_Hcol_v1_1_c( `id` bigint COMMENT 'ID', `type` string COMMENT 'type (QSO for all objects)', `z` float COMMENT 'redshift (z). Redshifts are in the range [6-15]', `m1450` float COMMENT 'absolute magnitude at 1450 angstroms (M1450)', `template` string COMMENT 'spectral template name (among 21)', `template_int` int COMMENT 'spectral template as integer (from 0 to 20)', `h` float COMMENT 'approximate H-band magnitude for information only. This was computed for applying the magnitude cut' ) CLUSTERED BY ( id ) SORTED BY ( id ASC ) INTO 4 BUCKETS ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ;
INSERT OVERWRITE TABLE cosmohub.full_H25cut_Hcol_v1_1_c SELECT `id`, `type`, `z`,`m1450`, `template`, CASE WHEN template = 'Banados_avg_Harris' THEN 0 WHEN template = 'Banados_avg_Selsing' THEN 1 WHEN template = 'Banados_avg_Vandenberk' THEN 2 WHEN template = 'Banados_hlya_Harris' THEN 3 WHEN template = 'Banados_hlya_Selsing' THEN 4 WHEN template = 'Banados_hlya_Vandenberk' THEN 5 WHEN template = 'Banados_llya_Harris' THEN 6 WHEN template = 'Banados_llya_Selsing' THEN 7 WHEN template = 'Banados_llya_Vandenberk' THEN 8 WHEN template = 'Hewett_l0_cb' THEN 9 WHEN template = 'Hewett_l0_cr' THEN 10 WHEN template = 'Hewett_l0_cs' THEN 11 WHEN template = 'Hewett_ld_cb' THEN 12 WHEN template = 'Hewett_ld_cr' THEN 13 WHEN template = 'Hewett_ld_cs' THEN 14 WHEN template = 'Hewett_lh_cb' THEN 15 WHEN template = 'Hewett_lh_cr' THEN 16 WHEN template = 'Hewett_lh_cs' THEN 17 WHEN template = 'Hewett_ls_cb' THEN 18 WHEN template = 'Hewett_ls_cr' THEN 19 WHEN template = 'Hewett_ls_cs' THEN 20 END as template_int, `h` FROM tallada.full_H25cut_Hcol;
ANALYZE TABLE cosmohub.full_H25cut_Hcol_v1_1_c COMPUTE STATISTICS FOR COLUMNS;