From 9fb94ba90e47a61bef6a6aca7101a1c9eb6d3688 Mon Sep 17 00:00:00 2001 From: Mathieu de Bony Date: Thu, 27 Nov 2025 13:33:40 +0100 Subject: [PATCH 1/3] Add configuration for LZA test --- .../README.md | 7 + .../add_slurm_options.py | 40 ++ .../change_nestimators.py | 25 ++ .../lstchain_config.json | 369 ++++++++++++++++++ .../lstmcpipe_config.yml | 40 ++ 5 files changed, 481 insertions(+) create mode 100644 production_configs/20251126_v0.10.12_LZA_investigation/README.md create mode 100644 production_configs/20251126_v0.10.12_LZA_investigation/add_slurm_options.py create mode 100644 production_configs/20251126_v0.10.12_LZA_investigation/change_nestimators.py create mode 100644 production_configs/20251126_v0.10.12_LZA_investigation/lstchain_config.json create mode 100644 production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/README.md b/production_configs/20251126_v0.10.12_LZA_investigation/README.md new file mode 100644 index 00000000..83a4be5a --- /dev/null +++ b/production_configs/20251126_v0.10.12_LZA_investigation/README.md @@ -0,0 +1,7 @@ +# LZA investigation + +We are currently investigation some discrepancy observed at LZA with the Crab. For this purpose a few alternative simulations with different atmosphere have been simulated. +The objective is to process the same way as the standard production + +Parameters used for production are identical to 20240918_v0.10.12_allsky_nsb_grid with modification of 20250212_v0.10.17_allsky_interp_dl2_irfs. +The RF used are the one of the standard productions. diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/add_slurm_options.py b/production_configs/20251126_v0.10.12_LZA_investigation/add_slurm_options.py new file mode 100644 index 00000000..2e05534c --- /dev/null +++ b/production_configs/20251126_v0.10.12_LZA_investigation/add_slurm_options.py @@ -0,0 +1,40 @@ +import sys +from pathlib import Path +from ruamel.yaml import YAML + + +def add_slurm_options(config_file): + """ + Add SLURM options to each step in the given YML config file. + This should ease job handling on the cluster, allowing some jobs to be scheduled in a 12h window. + It also uses the `--nice` SLURM option to give priority to lstosa jobs. + """ + yaml = YAML() + yaml.preserve_quotes = True + config = yaml.load(open(config_file)) + + for stage_name, stage in config['stages'].items(): + for ii, step in enumerate(stage): + slurm_options = step.get('extra_slurm_options', {}) + if 'time' not in slurm_options: + if stage_name == 'r0_to_dl1': + slurm_options['time'] = '11:30:00' + elif stage_name == 'train_pipe': + slurm_options['time'] = '02-00:00:00' + elif stage_name == 'dl1ab': + slurm_options['time'] = '08:00:00' + elif stage_name == 'merge_dl1' and ('GammaDiffuse' in step['input'] or 'Protons' in step['input']): + slurm_options['time'] = '08:00:00' + else: + slurm_options['partition'] = 'short' + if 'nice' not in slurm_options: + slurm_options['nice'] = 10 + stage[ii]['extra_slurm_options'] = slurm_options + + with open(config_file, 'w') as f: + yaml.dump(config, f) + + +if __name__ == '__main__': + for cfg_file in Path('.').glob('NSB-*/lstmcpipe*.yml'): + add_slurm_options(cfg_file) diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/change_nestimators.py b/production_configs/20251126_v0.10.12_LZA_investigation/change_nestimators.py new file mode 100644 index 00000000..d78c42af --- /dev/null +++ b/production_configs/20251126_v0.10.12_LZA_investigation/change_nestimators.py @@ -0,0 +1,25 @@ +import json +import os + +# Directory path +directory = '.' + +# Iterate over all directories starting with 'NSB-' +for root, dirs, files in os.walk(directory): + if root.startswith(os.path.join(directory, 'NSB-')): + # Iterate over all JSON files in the directory + for file in files: + if file.endswith('.json'): + file_path = os.path.join(root, file) + + # Open the JSON file + with open(file_path, 'r') as f: + data = json.load(f) + + # Modify the 'n_estimators' values + data['random_forest_energy_regressor_args']['n_estimators'] = 50 + data['random_forest_disp_regressor_args']['n_estimators'] = 50 + + # Write the modified data back to the JSON file + with open(file_path, 'w') as f: + json.dump(data, f, indent=4) diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/lstchain_config.json b/production_configs/20251126_v0.10.12_LZA_investigation/lstchain_config.json new file mode 100644 index 00000000..d68cadb0 --- /dev/null +++ b/production_configs/20251126_v0.10.12_LZA_investigation/lstchain_config.json @@ -0,0 +1,369 @@ +{ + "source_config": { + "EventSource": { + "allowed_tels": [ + 1 + ], + "max_events": null + }, + "LSTEventSource": { + "default_trigger_type": "ucts", + "allowed_tels": [ + 1 + ], + "min_flatfield_adc": 3000, + "min_flatfield_pixel_fraction": 0.8, + "calibrate_flatfields_and_pedestals": false, + "EventTimeCalculator": { + "dragon_reference_counter": null, + "dragon_reference_time": null + }, + "PointingSource": { + "drive_report_path": null + }, + "LSTR0Corrections": { + "calib_scale_high_gain": 1.088, + "calib_scale_low_gain": 1.004, + "drs4_pedestal_path": null, + "calibration_path": null, + "drs4_time_calibration_path": null + } + } + }, + "events_filters": { + "intensity": [ + 0, + Infinity + ], + "width": [ + 0, + Infinity + ], + "length": [ + 0, + Infinity + ], + "wl": [ + 0, + Infinity + ], + "r": [ + 0, + Infinity + ], + "leakage_intensity_width_2": [ + 0, + Infinity + ] + }, + "n_training_events": { + "gamma_regressors": 1.0, + "gamma_tmp_regressors": 0.8, + "gamma_classifier": 0.2, + "proton_classifier": 1.0 + }, + "tailcut": { + "picture_thresh": 8, + "boundary_thresh": 4, + "keep_isolated_pixels": false, + "min_number_picture_neighbors": 2, + "use_only_main_island": false, + "delta_time": 2 + }, + "tailcuts_clean_with_pedestal_threshold": { + "picture_thresh": 8, + "boundary_thresh": 4, + "sigma": 2.5, + "keep_isolated_pixels": false, + "min_number_picture_neighbors": 2, + "use_only_main_island": false, + "delta_time": 2 + }, + "dynamic_cleaning": { + "apply": true, + "threshold": 267, + "fraction_cleaning_intensity": 0.03 + }, + "random_forest_weight_settings": { + "pointing_wise_weights": true + }, + "random_forest_energy_regressor_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 150, + "bootstrap": true, + "criterion": "squared_error", + "max_features": 1.0, + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "oob_score": false, + "random_state": 42, + "warm_start": false + }, + "random_forest_disp_regressor_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 150, + "bootstrap": true, + "criterion": "squared_error", + "max_features": 1.0, + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "oob_score": false, + "random_state": 42, + "warm_start": false + }, + "random_forest_disp_classifier_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 100, + "criterion": "gini", + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "max_features": 1.0, + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "bootstrap": true, + "oob_score": false, + "random_state": 42, + "warm_start": false, + "class_weight": null + }, + "random_forest_particle_classifier_args": { + "max_depth": 30, + "min_samples_leaf": 10, + "n_jobs": -1, + "n_estimators": 100, + "criterion": "gini", + "min_samples_split": 10, + "min_weight_fraction_leaf": 0.0, + "max_features": 1.0, + "max_leaf_nodes": null, + "min_impurity_decrease": 0.0, + "bootstrap": true, + "oob_score": false, + "random_state": 42, + "warm_start": false, + "class_weight": null + }, + "energy_regression_features": [ + "log_intensity", + "width", + "length", + "x", + "y", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + "disp_method": "disp_norm_sign", + "disp_regression_features": [ + "log_intensity", + "width", + "length", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + "disp_classification_features": [ + "log_intensity", + "width", + "length", + "wl", + "skewness", + "kurtosis", + "time_gradient", + "leakage_intensity_width_2", + "sin_az_tel", + "alt_tel" + ], + "particle_classification_features": [ + "log_intensity", + "width", + "length", + "x", + "y", + "wl", + "signed_skewness", + "kurtosis", + "signed_time_gradient", + "leakage_intensity_width_2", + "log_reco_energy", + "reco_disp_norm", + "reco_disp_sign", + "sin_az_tel", + "alt_tel" + ], + "allowed_tels": [ + 1 + ], + "write_pe_image": false, + "mc_image_scaling_factor": 1, + "image_extractor": "LocalPeakWindowSum", + "image_extractor_for_muons": "GlobalPeakWindowSum", + "CameraCalibrator": { + "apply_waveform_time_shift": false + }, + "time_sampling_correction_path": "default", + "LocalPeakWindowSum": { + "window_shift": 4, + "window_width": 8, + "apply_integration_correction": true + }, + "GlobalPeakWindowSum": { + "window_shift": 4, + "window_width": 8, + "apply_integration_correction": true + }, + "timestamps_pointing": "ucts", + "train_gamma_src_r_deg": [ + 0, + Infinity + ], + "source_dependent": false, + "mc_nominal_source_x_deg": 0.4, + "mc_nominal_source_y_deg": 0.0, + "volume_reducer": { + "algorithm": null, + "parameters": {} + }, + "calibration_product": "LSTCalibrationCalculator", + "LSTCalibrationCalculator": { + "systematic_correction_path": null, + "npe_median_cut_outliers": [ + -5, + 5 + ], + "squared_excess_noise_factor": 1.222, + "flatfield_product": "FlasherFlatFieldCalculator", + "pedestal_product": "PedestalIntegrator", + "PedestalIntegrator": { + "sample_size": 10000, + "sample_duration": 100000, + "tel_id": 1, + "time_sampling_correction_path": null, + "charge_median_cut_outliers": [ + -10, + 10 + ], + "charge_std_cut_outliers": [ + -10, + 10 + ], + "charge_product": "FixedWindowSum", + "FixedWindowSum": { + "window_shift": 6, + "window_width": 12, + "peak_index": 18, + "apply_integration_correction": false + } + }, + "FlasherFlatFieldCalculator": { + "sample_size": 10000, + "sample_duration": 100000, + "tel_id": 1, + "time_sampling_correction_path": null, + "charge_product": "LocalPeakWindowSum", + "charge_median_cut_outliers": [ + -0.9, + 2 + ], + "charge_std_cut_outliers": [ + -10, + 10 + ], + "time_cut_outliers": [ + 2, + 38 + ], + "LocalPeakWindowSum": { + "window_shift": 5, + "window_width": 12, + "apply_integration_correction": false + } + } + }, + "waveform_nsb_tuning": { + "nsb_tuning": false, + "nsb_tuning_rate_GHz": 0.15, + "spe_location": null, + "pre_computed_multiplicity": 10 + }, + "write_interleaved_events": { + "DataWriter": { + "overwrite": true, + "write_images": false, + "write_parameters": false, + "write_waveforms": true, + "transform_waveform": true, + "waveform_dtype": "uint16", + "waveform_offset": 400, + "waveform_scale": 80 + } + }, + "EventSelector": { + "filters": { + "intensity": [50, Infinity], + "width": [0, Infinity], + "length": [0, Infinity], + "r": [0, 1], + "wl": [0.01, 1], + "leakage_intensity_width_2": [0, 1], + "event_type": [32, 32] + } + }, + "DL3Cuts": { + "min_event_p_en_bin": 100, + "global_gh_cut": 0.7, + "gh_efficiency": 0.7, + "min_gh_cut": 0.1, + "max_gh_cut": 0.98, + "global_alpha_cut": 10, + "global_theta_cut": 0.2, + "theta_containment": 0.7, + "alpha_containment": 0.7, + "min_theta_cut": 0.1, + "max_theta_cut": 0.32, + "fill_theta_cut": 0.32, + "min_alpha_cut": 1, + "max_alpha_cut": 20, + "fill_alpha_cut": 20, + "allowed_tels": [1] + }, + "DataBinning": { + "true_energy_min": 0.005, + "true_energy_max": 500, + "true_energy_n_bins": 25, + "scale_true_energy": 1.0, + "reco_energy_min": 0.005, + "reco_energy_max": 500, + "reco_energy_n_bins": 25, + "energy_migration_min": 0.2, + "energy_migration_max": 5, + "energy_migration_n_bins": 30, + "fov_offset_min": 0.1, + "fov_offset_max": 1.1, + "fov_offset_n_edges": 9, + "bkg_fov_offset_min": 0, + "bkg_fov_offset_max": 10, + "bkg_fov_offset_n_edges": 21, + "source_offset_min": 0, + "source_offset_max": 1, + "source_offset_n_edges": 101 + } +} \ No newline at end of file diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml new file mode 100644 index 00000000..39bc7aa9 --- /dev/null +++ b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml @@ -0,0 +1,40 @@ +workflow_kind: lstchain + +prod_id: 20251126_v0.10.12_LZA_investigation + +source_environment: + source_file: /fefs/aswg/software/conda/etc/profile.d/conda.sh + conda_env: lstchain-v0.10.12 + +slurm_config: +# dpps is the default account for lstanalyzer - other users should use aswg + user_account: dpps +lstmcpipe_version: 0.11.3 +prod_type: PathConfigAllSkyFullSplitDiffuse +stages_to_run: +- r0_to_dl1 +- merge_dl1 +- dl1_to_dl2 +stages: + r0_to_dl1: + - input: /fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_6166/sim_telarray/node_corsika_theta_60.66_az_32.517_/output_v1.4/ + output: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/node_corsika_theta_60.66_az_32.517_ + extra_slurm_options: + time: 11:30:00 + nice: 10 + merge_dl1: + - input: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/node_corsika_theta_60.66_az_32.517_ + output: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dl1_20251126_v0.10.12_LZA_investigation_nsb_tuning_0.00_test_node_corsika_theta_60.66_az_32.517_GammaDiffuse_merged.h5 + options: --pattern *.h5 --no-image + extra_slurm_options: + partition: long + time: 06:00:00 + nice: 10 + dl1_to_dl2: + - input: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dl1_20251126_v0.10.12_LZA_investigation_nsb_tuning_0.00_test_node_corsika_theta_60.66_az_32.517_GammaDiffuse_merged.h5 + path_model: /fefs/aswg/data/models/AllSky/20240918_v0.10.12_allsky_nsb_tuning_0.00/dec_6676 + output: /fefs/aswg/data/mc/DL2/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dec_6676/ + extra_slurm_options: + mem: 80GB + partition: short + nice: 10 From bdc381cd78bfbed5cb3da128ef5a2905f525f6e3 Mon Sep 17 00:00:00 2001 From: Mathieu de Bony Date: Mon, 1 Dec 2025 17:27:01 +0100 Subject: [PATCH 2/3] Fix file path --- .../20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml index 39bc7aa9..663f9541 100644 --- a/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml +++ b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml @@ -17,7 +17,7 @@ stages_to_run: - dl1_to_dl2 stages: r0_to_dl1: - - input: /fefs/aswg/data/mc/DL0/LSTProd2/TrainingDataset/GammaDiffuse/dec_6166/sim_telarray/node_corsika_theta_60.66_az_32.517_/output_v1.4/ + - input: /fefs/aswg/data/mc/DL0/LSTProd2/TestDataset/seasonal_variations/std_Winter/sim_telarray/node_corsika_theta_60.66_az_32.517_/output output: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/node_corsika_theta_60.66_az_32.517_ extra_slurm_options: time: 11:30:00 From 907e2c2d045dd8a7a52250361a2849edc1c37855 Mon Sep 17 00:00:00 2001 From: Mathieu de Bony Date: Mon, 1 Dec 2025 17:32:09 +0100 Subject: [PATCH 3/3] Use the correct RF --- .../20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml index 663f9541..8e3228c7 100644 --- a/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml +++ b/production_configs/20251126_v0.10.12_LZA_investigation/lstmcpipe_config.yml @@ -32,8 +32,8 @@ stages: nice: 10 dl1_to_dl2: - input: /fefs/aswg/data/mc/DL1/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dl1_20251126_v0.10.12_LZA_investigation_nsb_tuning_0.00_test_node_corsika_theta_60.66_az_32.517_GammaDiffuse_merged.h5 - path_model: /fefs/aswg/data/models/AllSky/20240918_v0.10.12_allsky_nsb_tuning_0.00/dec_6676 - output: /fefs/aswg/data/mc/DL2/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dec_6676/ + path_model: /fefs/aswg/data/models/AllSky/20240918_v0.10.12_allsky_nsb_tuning_0.00/dec_6166 + output: /fefs/aswg/data/mc/DL2/AllSky/20251126_v0.10.12_LZA_investigation/TestingDataset/GammaDiffuse/dec_6166/ extra_slurm_options: mem: 80GB partition: short