mirror of
https://github.com/heyarne/earth-observation-for-journalism.git
synced 2026-05-06 19:13:40 +02:00
New raster cloud mask implementation
This commit is contained in:
parent
8336055833
commit
6387dc28bc
8 changed files with 914 additions and 933 deletions
|
|
@ -3,7 +3,7 @@
|
|||
As mentioned before, the Sentinel-2 satellites capture data far outside the range of visible light. With wavelengths of up to 2100nm, some bands go far into the infra-red spectrum.
|
||||
The invisible parts of the spectrum are especially useful to calculate _Spectral Indices_ or _Indicators_.
|
||||
|
||||
These indicators can be used for a range of different tasks. The Normalized Difference in Water Index (NDWI) for example provides a value containing information about how likely a pixel represents water. The Bare-Soil Index (BSI) gives an estimate about how likely a pixel represents soil that is not covered by vegetation. Others indicate vegetation health or plant activity. The method these indices rely on is the transfer of knowledge about the different reflective properties of material in specific wavelengths into mathematical formulas that allow drawing conclusions about surface phenomena.
|
||||
These indicators can be used for a range of different tasks. The Normalized Difference Water Index (NDWI) for example provides a value containing information about how likely a pixel represents water. The Bare-Soil Index (BSI) gives an estimate about how likely a pixel represents soil that is not covered by vegetation. Others indicate vegetation health or plant activity. The method these indices rely on is the transfer of knowledge about the different reflective properties of material in specific wavelengths into mathematical formulas that allow drawing conclusions about surface phenomena.
|
||||
|
||||
This chapter explores how to calculate these indices over long and short time spans, leveraging the high spatial resolution to provide highly localized information.
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
259
sources/02b NDVI.ipynb
Normal file
259
sources/02b NDVI.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -11,28 +11,115 @@
|
|||
"This notebook explores the contents of these files and evaluates how a helper for dealing with cloud masks can be implemented."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"Empty GeoDataFrame\n",
|
||||
"Columns: []\n",
|
||||
"Index: []"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sentinel_helpers import scihub_band_paths\n",
|
||||
"product_path = 'input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip'"
|
||||
"import geopandas as gpd\n",
|
||||
"from rasterio.features import geometry_window\n",
|
||||
"\n",
|
||||
"tempelhofer_feld = gpd.read_file('resources/tempelhofer_feld/tempelhofer_feld.geojson')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sentinel_helpers import scihub_band_paths, raster_cloud_mask\n",
|
||||
"product_path = 'resources/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data, transform = raster_cloud_mask(product_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'zip+file://input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip!/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.SAFE/GRANULE/L2A_T32UQD_A018609_20190114T101352/QI_DATA/MSK_CLDPRB_20m.jp2'"
|
||||
"(1, 10980, 10980)"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"PosixPath('zip+file:/home/jovyan/sources/resources/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip!/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.SAFE/GRANULE/L2A_T32UQD_A018609_20190114T101352/QI_DATA/MSK_CLDPRB_20m.jp2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
|
@ -44,7 +131,16 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rasterio.windows import Window"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
|
@ -52,6 +148,28 @@
|
|||
"import rasterio.plot as rplt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Window(col_off=4893, row_off=3806, width=119, height=99)"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with r.open(raster_cloud_mask) as mask:\n",
|
||||
" window = geometry_window(mask, tempelhofer_feld.to_crs(mask.crs)['geometry'])\n",
|
||||
"window"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
|
|
|
|||
|
|
@ -106,10 +106,6 @@
|
|||
"\\text{NDVI} = \\frac{\\text{B08} - \\text{B04}}{\\text{B08} + \\text{B04}}\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
"They do pixel-wise operations across bands, which can be express using `numpy` `ndarrays`.\n",
|
||||
"`numpy` arrays support an operation called [broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html): A number of mathematical operators are overloaded\n",
|
||||
"to not only work on scalar but multidimensional data. For two arrays `array_a` and `array_b` with identical shapes, `array_a + array_b` adds each cell in `array_a` with the corresponding cell in `array_b`.\n",
|
||||
"\n",
|
||||
"To define the index calculation formulas in this way, first the the basic arithmetic operations `+`, `-`, `*` and `/` are wrapped in functions taking variadic arguments:"
|
||||
]
|
||||
},
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 11 MiB |
|
|
@ -8,8 +8,9 @@ import geopandas as gpd
|
|||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
import rasterio as r
|
||||
from rasterio.features import geometry_mask
|
||||
from rasterio.features import geometry_mask, geometry_window
|
||||
from rasterio.warp import calculate_default_transform, reproject, Resampling
|
||||
from rasterio.windows import Window
|
||||
|
||||
from shapely.geometry import shape
|
||||
from shapely.geometry.polygon import Polygon
|
||||
|
|
@ -101,63 +102,45 @@ def scihub_bgr_paths(product_path, resolution=None):
|
|||
return scihub_band_paths(product_path, ['B02', 'B03', 'B04'], resolution)
|
||||
|
||||
|
||||
def scihub_cloud_mask(product_path, **kwargs):
|
||||
def scihub_cloud_mask(product_path, area=None, cloud_probability=0.75, resolution='10m'):
|
||||
'''
|
||||
Given a `product_path` pointing to a product downlaoded from the Copernicus
|
||||
Open Access Hub, returns a shapely geometry representing the included cloud
|
||||
mask.
|
||||
|
||||
If an additional parameter, `rasterize=True` is given, the returned cloud
|
||||
mask will be a rasterized numpy ndarray instead of a vector geometry. Two
|
||||
additional parameters, `target_path` and `target_transform` are needed to
|
||||
determine the size of this array. In this array, pixels with clouds are
|
||||
`False` and pixels without clouds are `True`.
|
||||
Returns a numpy array with boolean values representing a product's cloud
|
||||
mask. Cloudy pixels are True, non-cloudy pixels are False.
|
||||
'''
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
# we need the temporary directory to work around a problem with reading
|
||||
# vector files from zip archives
|
||||
|
||||
p = Path(product_path)
|
||||
if p.suffix == '.zip':
|
||||
# when dealing with zip files we have to read the filenames from the
|
||||
# archive first
|
||||
with ZipFile(p) as f:
|
||||
files = f.namelist()
|
||||
file = [f for f in files if f.endswith('MSK_CLOUDS_B00.gml')][0]
|
||||
f.extract(file, tmp_dir)
|
||||
file = Path(tmp_dir) / file
|
||||
# TODO: Subset for area
|
||||
# there is no mask in 10m resolution an we need to manually upsample it;
|
||||
# upsampling code is taken from the rasterio documentation:
|
||||
# https://rasterio.readthedocs.io/en/latest/topics/resampling.html
|
||||
if resolution in ['20m', '60m']:
|
||||
mask_resolution = resolution
|
||||
upscale_factor = 1
|
||||
else:
|
||||
file = list(p.glob('**/MSK_CLOUDS_B00.gml'))[0]
|
||||
mask_resolution = '20m'
|
||||
upscale_factor = 2
|
||||
|
||||
try:
|
||||
with fiona.open(file) as features:
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
# this returns a warning because the iterator has to be
|
||||
# rewound; while this is a performance issue, we can ignore it
|
||||
mask = unary_union([shape(f['geometry']) for f in features])
|
||||
except ValueError:
|
||||
# empty cloud mask
|
||||
mask = Polygon([])
|
||||
|
||||
if kwargs.get('rasterize'):
|
||||
# return raster version of the vector geometry we found above
|
||||
target_shape = kwargs.get('target_shape')
|
||||
target_transform = kwargs.get('target_transform')
|
||||
if not target_transform or not target_shape:
|
||||
error_msg = 'target_transform and target_shape need to be set ' + \
|
||||
'to construct a rasterized cloud mask.'
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# completely empty cloud masks have to be handled separately
|
||||
if mask.is_empty:
|
||||
return np.full(target_shape, True)
|
||||
|
||||
return geometry_mask(mask,
|
||||
out_shape=target_shape,
|
||||
transform=target_transform)
|
||||
mask_path = scihub_band_paths(product_path, ['MSK_CLDPRB'], mask_resolution)[0]
|
||||
with r.open(mask_path) as mask:
|
||||
if isinstance(area, gpd.GeoDataFrame):
|
||||
window = geometry_window(mask, area.to_crs(mask.crs)['geometry'])
|
||||
else:
|
||||
return mask
|
||||
window = Window(0, 0, mask.width, mask.height)
|
||||
|
||||
mask_data = mask.read(
|
||||
out_shape=(
|
||||
mask.count,
|
||||
int(window.height * upscale_factor),
|
||||
int(window.width * upscale_factor)
|
||||
),
|
||||
window=window,
|
||||
resampling=Resampling.bilinear
|
||||
)
|
||||
mask_transform = mask.transform * mask.transform.scale(
|
||||
(mask.width / mask_data.shape[-1]),
|
||||
(mask.height / mask_data.shape[-2])
|
||||
)
|
||||
|
||||
# mask_data values range from 0 to 100, cloud_probability from 0 to 1
|
||||
return mask_data >= (cloud_probability * 100), mask_transform
|
||||
|
||||
|
||||
def scihub_band_date(band):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue