Add research about environmental indicators

This commit is contained in:
heyarne 2021-02-18 14:43:26 +00:00
commit dec537d2d6
8 changed files with 1211 additions and 1918 deletions

View file

@ -0,0 +1,51 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import folium\n",
"import sentinel_helpers\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# the area you want to create a true color image from; will be the first area\n",
"# returned from nominatim.openstreetmap.com\n",
"region_of_interest = 'Berlin, Germany'\n",
"\n",
"# start and end of the time span from which to select satellite data\n",
"start_date = date(2020, 7, 1)\n",
"end_date = start_date + timedelta(days=31)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View file

@ -0,0 +1,32 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -4,65 +4,67 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Problems with Zipfiles\n",
"## Corrupted Zipfiles\n",
"\n",
"Some of the downloaded zip files are suspiciously small:"
"Out of the 40 files we were trying to download, some needed to be fetched from the [Long-Term Archive](https://scihub.copernicus.eu/userguide/#LTA_Long_Term_Archive_Access).\n",
"After retrying the download several times, all files could be retrieved.\n",
"However, some of the downloaded zip files are suspiciously small:"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.2G input/tempelhofer_feld_next_try/S2B_MSIL2A_20190601T102029_N0212_R065_T33UUU_20190601T135040.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20190825T102031_N0213_R065_T33UUU_20190825T134836.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20191014T102031_N0213_R065_T32UQD_20191014T130941.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20190726T102031_N0213_R065_T33UUU_20190726T125507.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20190417T102031_N0211_R065_T33UUU_20190417T130913.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2B_MSIL2A_20190711T102029_N0213_R065_T33UUU_20190711T135545.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2B_MSIL2A_20190402T102029_N0211_R065_T33UUU_20190402T135010.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2B_MSIL2A_20191029T102039_N0213_R065_T32UQD_20191029T134629.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2B_MSIL2A_20190422T102029_N0211_R065_T32UQD_20190422T133643.zip\n",
"1.1G input/tempelhofer_feld_next_try/S2A_MSIL2A_20191213T102421_N0213_R065_T33UUU_20191213T120011.zip\n",
"845M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190723T101031_N0213_R022_T33UUU_20190723T125722.zip\n",
"829M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190911T101021_N0213_R022_T33UUU_20190911T143947.zip\n",
"823M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190713T101031_N0213_R022_T33UUU_20190713T135651.zip\n",
"823M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190718T101039_N0213_R022_T33UUU_20190718T131731.zip\n",
"819M input/tempelhofer_feld_next_try/S2A_MSIL2A_20191210T101411_N0213_R022_T33UUU_20191210T114322.zip\n",
"813M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190921T101031_N0213_R022_T33UUU_20190921T130515.zip\n",
"809M input/tempelhofer_feld_next_try/S2A_MSIL2A_20191220T101431_N0213_R022_T33UUU_20191220T115219.zip\n",
"802M input/tempelhofer_feld_next_try/S2B_MSIL2A_20191205T101309_N0213_R022_T33UUU_20191205T122401.zip\n",
"802M input/tempelhofer_feld_next_try/S2A_MSIL2A_20191130T101401_N0213_R022_T33UUU_20191130T115440.zip\n",
"789M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190827T101029_N0213_R022_T32UQD_20190827T134854.zip\n",
"789M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190519T101039_N0212_R022_T33UUU_20190519T132053.zip\n",
"774M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190728T101029_N0213_R022_T32UQD_20190728T134658.zip\n",
"771M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190906T101029_N0213_R022_T32UQD_20190906T133832.zip\n",
"768M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190529T101039_N0212_R022_T32UQD_20190529T130331.zip\n",
"766M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip\n",
"764M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190218T101059_N0211_R022_T32UQD_20190218T161620.zip\n",
"761M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip\n",
"753M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190409T101029_N0211_R022_T32UQD_20190409T134504.zip\n",
"723M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip\n",
" 43M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
" 42M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
" 38M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
" 35M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
" 31M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
" 30M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
" 30M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
" 29M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
" 29M input/tempelhofer_feld_next_try/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
" 25M input/tempelhofer_feld_next_try/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
" 25M input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n",
" 29M input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
" 29M input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
" 31M input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
" 35M input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
" 38M input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
" 42M input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
" 43M input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"723M input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip\n",
"753M input/tempelhofer_feld/S2B_MSIL2A_20190409T101029_N0211_R022_T32UQD_20190409T134504.zip\n",
"761M input/tempelhofer_feld/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip\n",
"764M input/tempelhofer_feld/S2B_MSIL2A_20190218T101059_N0211_R022_T32UQD_20190218T161620.zip\n",
"766M input/tempelhofer_feld/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip\n",
"768M input/tempelhofer_feld/S2B_MSIL2A_20190529T101039_N0212_R022_T32UQD_20190529T130331.zip\n",
"771M input/tempelhofer_feld/S2B_MSIL2A_20190906T101029_N0213_R022_T32UQD_20190906T133832.zip\n",
"774M input/tempelhofer_feld/S2B_MSIL2A_20190728T101029_N0213_R022_T32UQD_20190728T134658.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190519T101039_N0212_R022_T33UUU_20190519T132053.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190827T101029_N0213_R022_T32UQD_20190827T134854.zip\n",
"802M input/tempelhofer_feld/S2A_MSIL2A_20191130T101401_N0213_R022_T33UUU_20191130T115440.zip\n",
"802M input/tempelhofer_feld/S2B_MSIL2A_20191205T101309_N0213_R022_T33UUU_20191205T122401.zip\n",
"809M input/tempelhofer_feld/S2A_MSIL2A_20191220T101431_N0213_R022_T33UUU_20191220T115219.zip\n",
"813M input/tempelhofer_feld/S2A_MSIL2A_20190921T101031_N0213_R022_T33UUU_20190921T130515.zip\n",
"819M input/tempelhofer_feld/S2A_MSIL2A_20191210T101411_N0213_R022_T33UUU_20191210T114322.zip\n",
"823M input/tempelhofer_feld/S2B_MSIL2A_20190718T101039_N0213_R022_T33UUU_20190718T131731.zip\n",
"823M input/tempelhofer_feld/S2A_MSIL2A_20190713T101031_N0213_R022_T33UUU_20190713T135651.zip\n",
"829M input/tempelhofer_feld/S2A_MSIL2A_20190911T101021_N0213_R022_T33UUU_20190911T143947.zip\n",
"845M input/tempelhofer_feld/S2A_MSIL2A_20190723T101031_N0213_R022_T33UUU_20190723T125722.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191213T102421_N0213_R065_T33UUU_20191213T120011.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190422T102029_N0211_R065_T32UQD_20190422T133643.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20191029T102039_N0213_R065_T32UQD_20191029T134629.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190402T102029_N0211_R065_T33UUU_20190402T135010.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190711T102029_N0213_R065_T33UUU_20190711T135545.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190417T102031_N0211_R065_T33UUU_20190417T130913.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190726T102031_N0213_R065_T33UUU_20190726T125507.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191014T102031_N0213_R065_T32UQD_20191014T130941.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190825T102031_N0213_R065_T33UUU_20190825T134836.zip\n",
"1.2G input/tempelhofer_feld/S2B_MSIL2A_20190601T102029_N0212_R065_T33UUU_20190601T135040.zip\n"
]
}
],
"source": [
"! ls -Ssh input/tempelhofer_feld_next_try/*.zip "
"! ls -rSsh input/tempelhofer_feld/*.zip"
]
},
{
@ -95,9 +97,16 @@
"! ls -S input/tempelhofer_feld/*.zip | tail -n1 | xargs unzip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What does the API say?"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -107,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -116,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@ -134,7 +143,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -144,13 +153,12 @@
"Name: size, dtype: object"
]
},
"execution_count": 17,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can see that the size given by the scihub api is way larger\n",
"res['size']"
]
},
@ -158,59 +166,110 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Do the downloads fail repeatedly?"
"We can see that the size given by the scihub api is way larger."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Do the downloads fail repeatedly?\n",
"\n",
"All files have been downloaded again to another folder, `input/tempelhofer_feld_test`."
]
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld -type f -size -500M -name '*.zip'"
"! find input/tempelhofer_feld -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input/tempelhofer_feld_test/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"input/tempelhofer_feld_test/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"input/tempelhofer_feld_test/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"input/tempelhofer_feld_test/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld_test/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld_test/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld_test/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld_test/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld_test/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld_test/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld_test/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld_test/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld_test/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld_test/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld_test -type f -size -500M -name '*.zip'"
"! find input/tempelhofer_feld_test -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The downloads are failing in exactly the same way when trying the downloads repeatedly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manual Download"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"https://scihub.copernicus.eu/apihub/odata/v1/Products('bedec483-5ee1-4264-8dfa-a3b53ce364f7')/$value\""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res['link'].iloc[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When following the link above, the target file is 25MB.\n",
"This points towards an error on the side of scihub."
]
},
{

View file

@ -0,0 +1,288 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multi-Threading Comparison\n",
"\n",
"This notebook contains a performance comparison of different methods to process the NDVI calculations.\n",
"\n",
"The `%%timeit` cell magic runs the cell content multiple times and outputs statistics on those multiple runs, thereby reducing factors such as garbage collection pauses etc."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from multiprocessing import Pool, cpu_count\n",
"from numpy import ma\n",
"from pathlib import Path\n",
"import rasterio as r"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of files: 27\n"
]
}
],
"source": [
"test_files = list(Path('output/ndvi').glob('*.tif'))\n",
"print(f'Number of files: {len(test_files)}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The function we test with:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def average(file_path):\n",
" with r.open(file_path) as src:\n",
" data = src.read(1, masked=True)\n",
" return file_path, ma.average(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In a single process\n",
"### Time to process a single file"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"36.2 ms ± 42.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"average(test_files[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Time to process all files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"980 ms ± 7.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"averages = [avg for avg in map(average, test_files)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Increasing the list size"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.86 s ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"averages = [avg for avg in map(average, test_files * 5)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Time when using a worker pool\n",
"\n",
"Number of CPUs the multiprocessing pools can access:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpu_count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### On One element"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"277 ms ± 3.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files[:1])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### On the complete list"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"630 ms ± 8.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Increasing the list size"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.1 s ± 20 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files * 5)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Result\n",
"\n",
"As we can see when processing a single element, multiprocessing comes with an overhead.\n",
"When the list to be processed is sufficiently large, we get a reduction in processing time of roughly 30%-50%, depending on list size.\n",
"\n",
"Averaging the masked array is a fairly simple operation that scales in $O(N)$ with the size of the input array.\n",
"The time reduction should be even higher for more complex tasks."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because one or more lines are too long

View file

@ -1,6 +1,5 @@
import urllib.parse
from pathlib import Path
import zipfile
import fiona
import geopandas as gpd
@ -9,6 +8,15 @@ import numpy as np
import rasterio as r
from rasterio.warp import calculate_default_transform, reproject, Resampling
from shapely.geometry import shape
from shapely.geometry.polygon import Polygon
from shapely.ops import unary_union
from tempfile import TemporaryDirectory
from zipfile import ZipFile
import warnings
def search_osm(place):
'''
Returns a GeoDataFrame with results from OpenStreetMap Nominatim for the given search string.
@ -73,7 +81,7 @@ def scihub_band_paths(p, bands, resolution=None):
if p.suffix == '.zip':
# when dealing with zip files we have to read the filenames from the
# archive first
with zipfile.ZipFile(p) as f:
with ZipFile(p) as f:
files = f.namelist()
rasters = [f for f in files if f.endswith('.jp2')]
else:
@ -93,14 +101,48 @@ def scihub_band_paths(p, bands, resolution=None):
return rasters
def scihub_bgr_paths(raster_path, resolution=None):
def scihub_bgr_paths(product_path, resolution=None):
'''
A convenence function to return the paths to the blue, green and red bands
in the downloaded product at `raster_path`.
in the downloaded product at `product_path`.
'''
return scihub_band_paths(raster_path, ['B02', 'B03', 'B04'], resolution)
return scihub_band_paths(product_path, ['B02', 'B03', 'B04'], resolution)
def scihub_cloud_mask(product_path):
'''
Given a `product_path` pointing to a product downlaoded from the Copernicus
Open Access Hub, returns a shapely geometry representing the included cloud
mask.
'''
with TemporaryDirectory() as tmp_dir:
# we need the temporary directory to work around a problem with reading
# vector files from zip archives
p = Path(product_path)
if p.suffix == '.zip':
# when dealing with zip files we have to read the filenames from the
# archive first
with ZipFile(p) as f:
files = f.namelist()
file = [f for f in files if f.endswith('MSK_CLOUDS_B00.gml')][0]
f.extract(file, tmp_dir)
file = Path(tmp_dir) / file
else:
file = list(p.glob('**/MSK_CLOUDS_B00.gml'))[0]
try:
with fiona.open(file) as features:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# this returns a warning because the iterator has to be
# rewound; while this is a performance issue, we can ignore it
return unary_union([shape(f['geometry']) for f in features])
except ValueError:
# empty cloud mask
return Polygon([])
def scihub_normalize_range(v):
'''
Raster files downloaded from the Copernicus Open Access Hub can contain
@ -113,6 +155,7 @@ def scihub_normalize_range(v):
def reproject_raster_image(src, dst, target_crs):
'''
FIXME: UNUSED!?
Reprojects `src` into `dst`, given a coordinate reference system `target_crs`.
'''
transform, width, height = calculate_default_transform(