Well i was trying to plot a QQ plot for rainfall : observation vs model data. i know there are very high values like 400 in the data but after I'm done plotting it, not all data is included; can anyone help,,,
this was the code i used. thanks for help in advancedthe output
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import geopandas as gpd
from shapely.geometry import mapping
from matplotlib.colors import LinearSegmentedColormap
# Load data
obs = xr.open_dataset('path\\IMD_MSG-2020-24-jjas.nc')
ncum_r_1 = xr.open_dataset('path\\ncumr_day1rf_jjas2021-24.nc')
ncum_r_2 = xr.open_dataset('path\\ncumr_day2rf_jjas2021-24.nc')
ncum_r_3 = xr.open_dataset('path\\ncumr_day3rf_jjas2021-24.nc')
# trimming
obs = obs.sel(time=slice('2021-06-01T03:00:00.000000000', '2024-09-30T03:00:00.000000000'))
# Rename coordinates
ncum_r_1 = ncum_r_1.rename({'latitude': 'lat', 'longitude': 'lon'})
ncum_r_2 = ncum_r_2.rename({'latitude': 'lat', 'longitude': 'lon'})
ncum_r_3 = ncum_r_3.rename({'latitude': 'lat', 'longitude': 'lon'})
# Regrid
ncum_r_1_regridded = ncum_r_1.interp_like(obs, method='nearest')
ncum_r_2_regridded = ncum_r_2.interp_like(obs, method='nearest')
ncum_r_3_regridded = ncum_r_3.interp_like(obs, method='nearest')
# Crop
lat_range, lon_range = slice(6, 41), slice(65, 106)
obs = obs.sel(lat=lat_range, lon=lon_range)
ncum_r_1_regridded = ncum_r_1_regridded.sel(lat=lat_range, lon=lon_range)
ncum_r_2_regridded = ncum_r_2_regridded.sel(lat=lat_range, lon=lon_range)
ncum_r_3_regridded = ncum_r_3_regridded.sel(lat=lat_range, lon=lon_range)
# Shapefile
shapefile_path = "path\\India_State_Boundary_02may2020.shp"
shape = gpd.read_file(shapefile_path)
#shape = shape.to_crs(epsg=4326)
obs = obs.rio.write_crs("EPSG:4326").rio.clip(shape.geometry.apply(mapping), shape.crs)
ncum_r_1_regridded = ncum_r_1_regridded.rio.write_crs("EPSG:4326").rio.clip(shape.geometry.apply(mapping), shape.crs)
ncum_r_2_regridded = ncum_r_2_regridded.rio.write_crs("EPSG:4326").rio.clip(shape.geometry.apply(mapping), shape.crs)
ncum_r_3_regridded = ncum_r_3_regridded.rio.write_crs("EPSG:4326").rio.clip(shape.geometry.apply(mapping), shape.crs)
# Extract data
obs_data = obs['rf'].mean(dim=['lat', 'lon']).values.ravel()
ncum_r1_data = ncum_r_1_regridded['APCP_surface'].mean(dim=['lat', 'lon']).values.ravel()
ncum_r2_data = ncum_r_2_regridded['APCP_surface'].mean(dim=['lat', 'lon']).values.ravel()
ncum_r3_data = ncum_r_3_regridded['APCP_surface'].mean(dim=['lat', 'lon']).values.ravel()
# Q-Q plot
def qq_plot(model_data, reference_data, label, ax):
model_quantiles = np.sort(model_data)
reference_quantiles = np.sort(reference_data)
min_len = min(len(model_quantiles), len(reference_quantiles))
model_quantiles = model_quantiles[:min_len]
reference_quantiles = reference_quantiles[:min_len]
ax.scatter(reference_quantiles, model_quantiles, label=label, alpha=0.6)
ax.plot(reference_quantiles, reference_quantiles, 'r--', label='1:1 Line')
ax.set_xlabel('Observed Rainfall Quantiles')
ax.set_ylabel('Predicted Rainfall Quantiles')
ax.set_title(f'Q-Q Plot: {label}')
ax.legend()
# Plot Q-Q plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
qq_plot(ncum_r1_data, obs_data, "NCUM R-Day1 vs Observation", axes[0])
qq_plot(ncum_r2_data, obs_data, "NCUM R-Day2 vs Observation", axes[1])
qq_plot(ncum_r3_data, obs_data, "NCUM R-Day3 vs Observation", axes[2])
plt.tight_layout()
plt.show()
this is the code i used to process the data and take the output