{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "from multiprocessing import Pool\n", "import geopandas as gpd\n", "from sentinel_helpers import scihub_band_paths" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "base_path = Path('input/tempelhofer_feld/')\n", "input_files = list(base_path.glob('*.zip'))\n", "area_of_interest = gpd.read_file(base_path / 'tempelhofer_feld.geojson')\n", "ndvi_path = base_path / 'ndvi'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import rasterio as r\n", "import rasterio.mask\n", "import rasterio.plot as rplt\n", "import numpy as np\n", "from sentinel_helpers import scihub_normalize_range\n", "from zipfile import BadZipFile" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def calculate_ndvi(raster_file):\n", " global area_of_interest\n", " \n", " try:\n", " b04_path, b08_path = scihub_band_paths(raster_file, ['B04', 'B08'], '10m')\n", " except BadZipFile:\n", " print(f'Problem reading {raster_file}, skipping it')\n", " return\n", "\n", " with r.open(b04_path, 'r') as b04, r.open(b08_path, 'r') as b08:\n", " # we want to only write the bare minimum data necessary to disk\n", " out_meta = b04.meta.copy()\n", "\n", " # we reproject the geojson file we fetched above and convert it so that rasterio\n", " # can use it as a mask\n", " mask = area_of_interest.to_crs(out_meta['crs']).iloc[0].geometry\n", " miny, minx, maxy, maxx = mask.bounds\n", "\n", " # update the dimensions and save as geotiff, not jp2\n", " out_meta.update({\n", " 'width': maxx - minx,\n", " 'height': maxy - miny,\n", " 'driver': 'GTiff',\n", " 'dtype': 'float32'\n", " }) \n", " out_name = Path(b04_path).name.replace('B04', 'NDVI').replace('.jp2', '.tif')\n", "\n", " ! mkdir -p {ndvi_path}\n", " with r.open(ndvi_path / out_name, 'w+', **out_meta) as dst:\n", " # we take only the part out of our source raster that we actually need\n", " # crop=True clips off the borders\n", " b04, transform_b04 = rasterio.mask.mask(b04, shapes=[mask], crop=True)\n", " b08, _ = rasterio.mask.mask(b08, shapes=[mask], crop=True) # we ignore the returned transform because it's identical to the previous one\n", "\n", " b04 = scihub_normalize_range(b04).astype('f4') # <- f4 = float32\n", " b08 = scihub_normalize_range(b08).astype('f4')\n", "\n", " # uncomment the following line to see if your masked shape looks correct\n", " #rplt.show(b04, transform=transform_b04)\n", " #rplt.show(b08, transform=transform_b04)\n", "\n", " # we want to be able to ignore divide by zero errors so the formula is nicer to write\n", " np.seterr(divide='ignore', invalid='ignore')\n", " ndvi = (b08 - b04) / (b08 + b04)\n", "\n", " # uncomment the following line to see if we calculated the index correctly\n", " # rplt.show(ndvi, transform=transform_b04)\n", "\n", " dst.write(ndvi)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from tqdm.notebook import tqdm" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d6bb63ba04f94679ab26c02235782b3c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=40.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip, skipping itProblem reading input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip, skipping it\n", "\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip, skipping it\n", "Problem reading input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip, skipping it\n", "\n", "CPU times: user 179 ms, sys: 154 ms, total: 333 ms\n", "Wall time: 23.7 s\n" ] } ], "source": [ "%%time\n", "\n", "pool = Pool()\n", "\n", "for _ in tqdm(pool.imap_unordered(calculate_ndvi, input_files), total=len(input_files)):\n", " # this loop is only here so we can get the progress bar\n", " pass" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How many files could we process?" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "31\n" ] } ], "source": [ "! ls -l {ndvi_path} | wc -l" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 4 }