New raster cloud mask implementation

This commit is contained in:
heyarne 2021-03-06 16:41:24 +00:00
commit 6387dc28bc
8 changed files with 914 additions and 933 deletions

View file

@ -3,7 +3,7 @@
As mentioned before, the Sentinel-2 satellites capture data far outside the range of visible light. With wavelengths of up to 2100nm, some bands go far into the infra-red spectrum.
The invisible parts of the spectrum are especially useful to calculate _Spectral Indices_ or _Indicators_.
These indicators can be used for a range of different tasks. The Normalized Difference in Water Index (NDWI) for example provides a value containing information about how likely a pixel represents water. The Bare-Soil Index (BSI) gives an estimate about how likely a pixel represents soil that is not covered by vegetation. Others indicate vegetation health or plant activity. The method these indices rely on is the transfer of knowledge about the different reflective properties of material in specific wavelengths into mathematical formulas that allow drawing conclusions about surface phenomena.
These indicators can be used for a range of different tasks. The Normalized Difference Water Index (NDWI) for example provides a value containing information about how likely a pixel represents water. The Bare-Soil Index (BSI) gives an estimate about how likely a pixel represents soil that is not covered by vegetation. Others indicate vegetation health or plant activity. The method these indices rely on is the transfer of knowledge about the different reflective properties of material in specific wavelengths into mathematical formulas that allow drawing conclusions about surface phenomena.
This chapter explores how to calculate these indices over long and short time spans, leveraging the high spatial resolution to provide highly localized information.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

259
sources/02b NDVI.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -11,28 +11,115 @@
"This notebook explores the contents of these files and evaluates how a helper for dealing with cloud masks can be implemented."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty GeoDataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sentinel_helpers import scihub_band_paths\n",
"product_path = 'input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip'"
"import geopandas as gpd\n",
"from rasterio.features import geometry_window\n",
"\n",
"tempelhofer_feld = gpd.read_file('resources/tempelhofer_feld/tempelhofer_feld.geojson')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from sentinel_helpers import scihub_band_paths, raster_cloud_mask\n",
"product_path = 'resources/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data, transform = raster_cloud_mask(product_path)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'zip+file://input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip!/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.SAFE/GRANULE/L2A_T32UQD_A018609_20190114T101352/QI_DATA/MSK_CLDPRB_20m.jp2'"
"(1, 10980, 10980)"
]
},
"execution_count": 2,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('zip+file:/home/jovyan/sources/resources/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip!/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.SAFE/GRANULE/L2A_T32UQD_A018609_20190114T101352/QI_DATA/MSK_CLDPRB_20m.jp2')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -44,7 +131,16 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from rasterio.windows import Window"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
@ -52,6 +148,28 @@
"import rasterio.plot as rplt"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Window(col_off=4893, row_off=3806, width=119, height=99)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with r.open(raster_cloud_mask) as mask:\n",
" window = geometry_window(mask, tempelhofer_feld.to_crs(mask.crs)['geometry'])\n",
"window"
]
},
{
"cell_type": "code",
"execution_count": 4,

View file

@ -106,10 +106,6 @@
"\\text{NDVI} = \\frac{\\text{B08} - \\text{B04}}{\\text{B08} + \\text{B04}}\n",
"$$\n",
"\n",
"They do pixel-wise operations across bands, which can be express using `numpy` `ndarrays`.\n",
"`numpy` arrays support an operation called [broadcasting](https://numpy.org/doc/stable/user/basics.broadcasting.html): A number of mathematical operators are overloaded\n",
"to not only work on scalar but multidimensional data. For two arrays `array_a` and `array_b` with identical shapes, `array_a + array_b` adds each cell in `array_a` with the corresponding cell in `array_b`.\n",
"\n",
"To define the index calculation formulas in this way, first the the basic arithmetic operations `+`, `-`, `*` and `/` are wrapped in functions taking variadic arguments:"
]
},

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 MiB

View file

@ -8,8 +8,9 @@ import geopandas as gpd
from matplotlib import pyplot as plt
import numpy as np
import rasterio as r
from rasterio.features import geometry_mask
from rasterio.features import geometry_mask, geometry_window
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.windows import Window
from shapely.geometry import shape
from shapely.geometry.polygon import Polygon
@ -101,63 +102,45 @@ def scihub_bgr_paths(product_path, resolution=None):
return scihub_band_paths(product_path, ['B02', 'B03', 'B04'], resolution)
def scihub_cloud_mask(product_path, **kwargs):
def scihub_cloud_mask(product_path, area=None, cloud_probability=0.75, resolution='10m'):
'''
Given a `product_path` pointing to a product downlaoded from the Copernicus
Open Access Hub, returns a shapely geometry representing the included cloud
mask.
If an additional parameter, `rasterize=True` is given, the returned cloud
mask will be a rasterized numpy ndarray instead of a vector geometry. Two
additional parameters, `target_path` and `target_transform` are needed to
determine the size of this array. In this array, pixels with clouds are
`False` and pixels without clouds are `True`.
Returns a numpy array with boolean values representing a product's cloud
mask. Cloudy pixels are True, non-cloudy pixels are False.
'''
with TemporaryDirectory() as tmp_dir:
# we need the temporary directory to work around a problem with reading
# vector files from zip archives
p = Path(product_path)
if p.suffix == '.zip':
# when dealing with zip files we have to read the filenames from the
# archive first
with ZipFile(p) as f:
files = f.namelist()
file = [f for f in files if f.endswith('MSK_CLOUDS_B00.gml')][0]
f.extract(file, tmp_dir)
file = Path(tmp_dir) / file
# TODO: Subset for area
# there is no mask in 10m resolution an we need to manually upsample it;
# upsampling code is taken from the rasterio documentation:
# https://rasterio.readthedocs.io/en/latest/topics/resampling.html
if resolution in ['20m', '60m']:
mask_resolution = resolution
upscale_factor = 1
else:
file = list(p.glob('**/MSK_CLOUDS_B00.gml'))[0]
mask_resolution = '20m'
upscale_factor = 2
try:
with fiona.open(file) as features:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# this returns a warning because the iterator has to be
# rewound; while this is a performance issue, we can ignore it
mask = unary_union([shape(f['geometry']) for f in features])
except ValueError:
# empty cloud mask
mask = Polygon([])
if kwargs.get('rasterize'):
# return raster version of the vector geometry we found above
target_shape = kwargs.get('target_shape')
target_transform = kwargs.get('target_transform')
if not target_transform or not target_shape:
error_msg = 'target_transform and target_shape need to be set ' + \
'to construct a rasterized cloud mask.'
raise ValueError(error_msg)
# completely empty cloud masks have to be handled separately
if mask.is_empty:
return np.full(target_shape, True)
return geometry_mask(mask,
out_shape=target_shape,
transform=target_transform)
mask_path = scihub_band_paths(product_path, ['MSK_CLDPRB'], mask_resolution)[0]
with r.open(mask_path) as mask:
if isinstance(area, gpd.GeoDataFrame):
window = geometry_window(mask, area.to_crs(mask.crs)['geometry'])
else:
return mask
window = Window(0, 0, mask.width, mask.height)
mask_data = mask.read(
out_shape=(
mask.count,
int(window.height * upscale_factor),
int(window.width * upscale_factor)
),
window=window,
resampling=Resampling.bilinear
)
mask_transform = mask.transform * mask.transform.scale(
(mask.width / mask_data.shape[-1]),
(mask.height / mask_data.shape[-2])
)
# mask_data values range from 0 to 100, cloud_probability from 0 to 1
return mask_data >= (cloud_probability * 100), mask_transform
def scihub_band_date(band):