mirror of
https://github.com/heyarne/earth-observation-for-journalism.git
synced 2026-05-07 03:23:40 +02:00
315 lines
10 KiB
Text
315 lines
10 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Calculating Spectral Indices Over a Long Time Span"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pathlib import Path\n",
|
|
"from multiprocessing import Pool\n",
|
|
"import geopandas as gpd\n",
|
|
"from sentinel_helpers import scihub_band_paths\n",
|
|
"\n",
|
|
"base_path = Path('input/tempelhofer_feld/')\n",
|
|
"input_files = list(base_path.glob('*.zip'))\n",
|
|
"area_of_interest = gpd.read_file(base_path / 'tempelhofer_feld.geojson')\n",
|
|
"ndvi_path = Path('output') / 'ndvi'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import rasterio as r\n",
|
|
"import rasterio.mask\n",
|
|
"import rasterio.plot as rplt\n",
|
|
"import numpy as np\n",
|
|
"from sentinel_helpers import scihub_normalize_range, scihub_cloud_mask\n",
|
|
"from zipfile import BadZipFile"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# uncomment this cell to remove previous calculations\n",
|
|
"# ! rm -rf {ndvi_path / '*.tif'}"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def calculate_ndvi(product_path):\n",
|
|
" global area_of_interest\n",
|
|
" \n",
|
|
" try:\n",
|
|
" b04_path, b08_path = scihub_band_paths(product_path, ['B04', 'B08'], '10m')\n",
|
|
" cloud_mask = scihub_cloud_mask(product_path)\n",
|
|
" except BadZipFile:\n",
|
|
" # This is due to a problem of Scihub\n",
|
|
" print(f'{product_path}: Problem reading product, skipping it')\n",
|
|
" return\n",
|
|
"\n",
|
|
" with r.open(b04_path, 'r') as b04, r.open(b08_path, 'r') as b08:\n",
|
|
" # we want to only write the bare minimum data necessary to disk\n",
|
|
" out_meta = b04.meta.copy()\n",
|
|
"\n",
|
|
" # we reproject the geojson file we fetched above and convert it so that rasterio\n",
|
|
" # can use it as a mask; we subtract the cloud mask to avoid irregularities\n",
|
|
" reprojected = area_of_interest.to_crs(out_meta['crs']).iloc[0].geometry\n",
|
|
" mask = reprojected.difference(cloud_mask)\n",
|
|
" \n",
|
|
" if mask.area / reprojected.area < 0.25:\n",
|
|
" print(f'{product_path}: Area is covered by clouds, skipping it')\n",
|
|
" return\n",
|
|
" \n",
|
|
" miny, minx, maxy, maxx = mask.bounds\n",
|
|
" \n",
|
|
" # update the dimensions and save as geotiff, not jp2\n",
|
|
" out_meta.update({\n",
|
|
" 'width': maxx - minx,\n",
|
|
" 'height': maxy - miny,\n",
|
|
" 'driver': 'GTiff',\n",
|
|
" 'dtype': 'float32'\n",
|
|
" }) \n",
|
|
" \n",
|
|
" out_name = Path(b04_path).name.replace('B04', 'NDVI').replace('.jp2', '.tif')\n",
|
|
"\n",
|
|
" ndvi_path.mkdir(exist_ok=True, parents=True)\n",
|
|
" with r.open(ndvi_path / out_name, 'w+', **out_meta) as dst:\n",
|
|
" # we take only the part out of our source raster that we actually need\n",
|
|
" # crop=True clips off the borders\n",
|
|
" b04, transform_b04 = rasterio.mask.mask(b04, shapes=[mask], crop=True, nodata=-999)\n",
|
|
" b08, _ = rasterio.mask.mask(b08, shapes=[mask], crop=True, nodata=-999) # we ignore the returned transform because it's identical to the previous one\n",
|
|
"\n",
|
|
" b04 = scihub_normalize_range(b04).astype('f4') # <- f4 = float32\n",
|
|
" b08 = scihub_normalize_range(b08).astype('f4')\n",
|
|
"\n",
|
|
" # uncomment the following line to see if your masked shape looks correct\n",
|
|
" #rplt.show(b04, transform=transform_b04)\n",
|
|
" #rplt.show(b08, transform=transform_b04)\n",
|
|
"\n",
|
|
" # we want to be able to ignore divide by zero errors so the formula is nicer to write\n",
|
|
" np.seterr(divide='ignore', invalid='ignore')\n",
|
|
" ndvi = (b08 - b04) / (b08 + b04)\n",
|
|
"\n",
|
|
" # uncomment the following line to see if we calculated the index correctly\n",
|
|
" # rplt.show(ndvi, transform=transform_b04)\n",
|
|
"\n",
|
|
" dst.write(ndvi)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "da8f523cda004cf281e368debdddb9c5",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=40.0), HTML(value='')))"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"input/tempelhofer_feld/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip: Area is covered by clouds, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip: Area is covered by clouds, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip: Problem reading product, skipping it\n",
|
|
"input/tempelhofer_feld/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip: Area is covered by clouds, skipping it\n",
|
|
"\n",
|
|
"CPU times: user 191 ms, sys: 172 ms, total: 363 ms\n",
|
|
"Wall time: 12 s\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"from tqdm.notebook import tqdm\n",
|
|
"\n",
|
|
"# we parallelize the NDVI calculation using a Python threadpool\n",
|
|
"with Pool() as pool:\n",
|
|
" for _ in tqdm(pool.imap_unordered(calculate_ndvi, input_files), total=len(input_files)):\n",
|
|
" # this loop is only here so we can get the progress bar\n",
|
|
" pass"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"How many files could we process?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"28\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! ls -l {ndvi_path} | wc -l"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Plot the NDVI\n",
|
|
"\n",
|
|
"See [02c Multithreading.ipynb](./02c Multithreading.ipynb) for a performance comparison of single vs multi-threaded iteration to average the ndvi."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from numpy import ma\n",
|
|
"output_files = list(ndvi_path.glob('*.tif'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def average(file_path):\n",
|
|
" with r.open(file_path) as src:\n",
|
|
" # masked=True makes sure to respect the nodata value and reads the data\n",
|
|
" # as a numpy MaskedArray, which lets us use `numpy.ma`'s methods\n",
|
|
" data = src.read(1, masked=True)\n",
|
|
" return file_path, ma.average(data)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "2f61f096377440319738f5ea049f24b5",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=27.0), HTML(value='')))"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"CPU times: user 48.5 ms, sys: 165 ms, total: 213 ms\n",
|
|
"Wall time: 754 ms\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%%time\n",
|
|
"with Pool() as pool:\n",
|
|
" averages = [avg for avg in tqdm(pool.map(average, output_files), total=len(output_files))]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We use the pandas library to plot the year-long NDVI development:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 52,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"paths, vals = zip(*averages)\n",
|
|
"dates = map(lambda p: p.name.split('_')[1], paths)\n",
|
|
"df = pd.DataFrame(\n",
|
|
" vals, index=pd.DatetimeIndex(dates)\n",
|
|
")\n",
|
|
"\n",
|
|
"df.plot()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|