Remove extra folders and rename source folder

This commit is contained in:
heyarne 2021-03-01 10:36:31 +00:00
commit ff7520e58d
46 changed files with 0 additions and 1402 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,304 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Corrupted Zipfiles\n",
"\n",
"Out of the 40 files we were trying to download, some needed to be fetched from the [Long-Term Archive](https://scihub.copernicus.eu/userguide/#LTA_Long_Term_Archive_Access).\n",
"After retrying the download several times, all files could be retrieved.\n",
"However, some of the downloaded zip files are suspiciously small:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 25M input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n",
" 29M input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
" 29M input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
" 31M input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
" 35M input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
" 38M input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
" 42M input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
" 43M input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"723M input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip\n",
"753M input/tempelhofer_feld/S2B_MSIL2A_20190409T101029_N0211_R022_T32UQD_20190409T134504.zip\n",
"761M input/tempelhofer_feld/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip\n",
"764M input/tempelhofer_feld/S2B_MSIL2A_20190218T101059_N0211_R022_T32UQD_20190218T161620.zip\n",
"766M input/tempelhofer_feld/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip\n",
"768M input/tempelhofer_feld/S2B_MSIL2A_20190529T101039_N0212_R022_T32UQD_20190529T130331.zip\n",
"771M input/tempelhofer_feld/S2B_MSIL2A_20190906T101029_N0213_R022_T32UQD_20190906T133832.zip\n",
"774M input/tempelhofer_feld/S2B_MSIL2A_20190728T101029_N0213_R022_T32UQD_20190728T134658.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190519T101039_N0212_R022_T33UUU_20190519T132053.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190827T101029_N0213_R022_T32UQD_20190827T134854.zip\n",
"802M input/tempelhofer_feld/S2A_MSIL2A_20191130T101401_N0213_R022_T33UUU_20191130T115440.zip\n",
"802M input/tempelhofer_feld/S2B_MSIL2A_20191205T101309_N0213_R022_T33UUU_20191205T122401.zip\n",
"809M input/tempelhofer_feld/S2A_MSIL2A_20191220T101431_N0213_R022_T33UUU_20191220T115219.zip\n",
"813M input/tempelhofer_feld/S2A_MSIL2A_20190921T101031_N0213_R022_T33UUU_20190921T130515.zip\n",
"819M input/tempelhofer_feld/S2A_MSIL2A_20191210T101411_N0213_R022_T33UUU_20191210T114322.zip\n",
"823M input/tempelhofer_feld/S2B_MSIL2A_20190718T101039_N0213_R022_T33UUU_20190718T131731.zip\n",
"823M input/tempelhofer_feld/S2A_MSIL2A_20190713T101031_N0213_R022_T33UUU_20190713T135651.zip\n",
"829M input/tempelhofer_feld/S2A_MSIL2A_20190911T101021_N0213_R022_T33UUU_20190911T143947.zip\n",
"845M input/tempelhofer_feld/S2A_MSIL2A_20190723T101031_N0213_R022_T33UUU_20190723T125722.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191213T102421_N0213_R065_T33UUU_20191213T120011.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190422T102029_N0211_R065_T32UQD_20190422T133643.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20191029T102039_N0213_R065_T32UQD_20191029T134629.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190402T102029_N0211_R065_T33UUU_20190402T135010.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190711T102029_N0213_R065_T33UUU_20190711T135545.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190417T102031_N0211_R065_T33UUU_20190417T130913.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190726T102031_N0213_R065_T33UUU_20190726T125507.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191014T102031_N0213_R065_T32UQD_20191014T130941.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190825T102031_N0213_R065_T33UUU_20190825T134836.zip\n",
"1.2G input/tempelhofer_feld/S2B_MSIL2A_20190601T102029_N0212_R065_T33UUU_20190601T135040.zip\n"
]
}
],
"source": [
"! ls -rSsh input/tempelhofer_feld/*.zip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Trying to extract them causes an error:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n",
" End-of-central-directory signature not found. Either this file is not\n",
" a zipfile, or it constitutes one disk of a multi-part archive. In the\n",
" latter case the central directory and zipfile comment will be found on\n",
" the last disk(s) of this archive.\n",
"unzip: cannot find zipfile directory in one of input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip or\n",
" input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.zip, and cannot find input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.ZIP, period.\n"
]
}
],
"source": [
"! ls -S input/tempelhofer_feld/*.zip | tail -n1 | xargs unzip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What does the API say?"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sentinelsat"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"api = sentinelsat.SentinelAPI(os.getenv('SCIHUB_USERNAME'), os.getenv('SCIHUB_PASSWORD'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.8/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
" return _prepare_from_string(\" \".join(pjargs))\n"
]
}
],
"source": [
"res = api.to_geodataframe(api.query(raw='S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509'))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"bedec483-5ee1-4264-8dfa-a3b53ce364f7 816.67 MB\n",
"Name: size, dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res['size']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see that the size given by the scihub api is way larger."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Do the downloads fail repeatedly?\n",
"\n",
"All files have been downloaded again to another folder, `input/tempelhofer_feld_test`."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld_test/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld_test/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld_test/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld_test/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld_test/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld_test/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld_test/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld_test/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld_test/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld_test/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld_test -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The downloads are failing in exactly the same way when trying the downloads repeatedly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manual Download"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"https://scihub.copernicus.eu/apihub/odata/v1/Products('bedec483-5ee1-4264-8dfa-a3b53ce364f7')/$value\""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res['link'].iloc[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When following the link above, the target file is 25MB.\n",
"This points towards an error on the side of scihub."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View file

@ -0,0 +1,288 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Multi-Threading Comparison\n",
"\n",
"This notebook contains a performance comparison of different methods to process the NDVI calculations.\n",
"\n",
"The `%%timeit` cell magic runs the cell content multiple times and outputs statistics on those multiple runs, thereby reducing factors such as garbage collection pauses etc."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from multiprocessing import Pool, cpu_count\n",
"from numpy import ma\n",
"from pathlib import Path\n",
"import rasterio as r"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of files: 27\n"
]
}
],
"source": [
"test_files = list(Path('output/ndvi').glob('*.tif'))\n",
"print(f'Number of files: {len(test_files)}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The function we test with:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def average(file_path):\n",
" with r.open(file_path) as src:\n",
" data = src.read(1, masked=True)\n",
" return file_path, ma.average(data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## In a single process\n",
"### Time to process a single file"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"36.2 ms ± 42.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"average(test_files[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Time to process all files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"980 ms ± 7.38 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"averages = [avg for avg in map(average, test_files)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Increasing the list size"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.86 s ± 10.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"averages = [avg for avg in map(average, test_files * 5)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Time when using a worker pool\n",
"\n",
"Number of CPUs the multiprocessing pools can access:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpu_count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### On One element"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"277 ms ± 3.92 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files[:1])]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### On the complete list"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"630 ms ± 8.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Increasing the list size"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.1 s ± 20 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"with Pool() as pool:\n",
" averages = [avg for avg in pool.map(average, test_files * 5)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Result\n",
"\n",
"As we can see when processing a single element, multiprocessing comes with an overhead.\n",
"When the list to be processed is sufficiently large, we get a reduction in processing time of roughly 30%-50%, depending on list size.\n",
"\n",
"Averaging the masked array is a fairly simple operation that scales in $O(N)$ with the size of the input array.\n",
"The time reduction should be even higher for more complex tasks."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,9 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "place_id": 3285415, "osm_type": "node", "osm_id": 387079362, "display_name": "Frohnsdorf, Treuenbrietzen, Potsdam-Mittelmark, Brandenburg, 14929, Deutschland", "place_rank": 19, "category": "place", "type": "village", "importance": 0.495, "icon": "https:\/\/nominatim.openstreetmap.org\/ui\/mapicons\/\/poi_place_village.p.20.png" }, "geometry": { "type": "Point", "coordinates": [ 12.902171, 52.0546551 ] } },
{ "type": "Feature", "properties": { "place_id": 554316, "osm_type": "node", "osm_id": 226935349, "display_name": "Klausdorf, Treuenbrietzen, Potsdam-Mittelmark, Brandenburg, Deutschland", "place_rank": 19, "category": "place", "type": "village", "importance": 0.495, "icon": "https:\/\/nominatim.openstreetmap.org\/ui\/mapicons\/\/poi_place_village.p.20.png" }, "geometry": { "type": "Point", "coordinates": [ 12.9421292, 52.0487863 ] } },
{ "type": "Feature", "properties": { "place_id": 303964452, "osm_type": "node", "osm_id": 282202396, "display_name": "Tiefenbrunnen, Treuenbrietzen, Potsdam-Mittelmark, Brandenburg, Deutschland", "place_rank": 22, "category": "place", "type": "isolated_dwelling", "importance": 0.42, "icon": null }, "geometry": { "type": "Point", "coordinates": [ 12.9443928, 52.0353239 ] } }
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 74 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 70 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 73 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 78 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 73 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 65 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 72 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 67 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 78 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 73 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 73 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 72 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 80 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 71 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 72 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 78 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 84 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 88 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 80 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 84 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 80 KiB

View file

@ -0,0 +1,230 @@
from dateutil.parser import parse as parse_datetime
import urllib.parse
from pathlib import Path
import fiona
import folium
import geopandas as gpd
from matplotlib import pyplot as plt
import numpy as np
import rasterio as r
from rasterio.features import geometry_mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
from shapely.geometry import shape
from shapely.geometry.polygon import Polygon
from shapely.ops import unary_union
from tempfile import TemporaryDirectory
from zipfile import ZipFile
import warnings
def search_osm(place):
'''
Returns a GeoDataFrame with results from OpenStreetMap Nominatim for the given search string.
'''
urlescaped_place = urllib.parse.quote(place)
search_url = ('https://nominatim.openstreetmap.org/search/?q={}' +
'&format=geojson&polygon_geojson=1').format(urlescaped_place)
return gpd.read_file(search_url)
def nth(xs, n, default=None):
'''
Wraps list access to return `default` instead of returning an `ItemError`
when accessing out-of-bounds elements. `default` is `None` when not
explicitly given.
'''
try:
return xs[n]
except IndexError:
return default
def plot_all(items, extra_kwargs=[]):
'''
Returns a plot containing all of the geometries in `items`.
If an `item` does not contain a `plot` method, a GeoSeries will be
constructed from it.
The parameter `extra_kwargs` can contain extra keyword arguments that are
passed to matplotlib for the given item.
'''
ax = None
for idx, item in enumerate(items):
if 'plot' not in dir(item):
item = gpd.GeoSeries(item)
kwargs = nth(extra_kwargs, idx, {})
if not ax:
ax = item.plot(**kwargs)
else:
item.plot(ax=ax, **kwargs)
def scihub_band_paths(p, bands, resolution=None):
'''
Given a zip file or folder at `p`, returns the paths inside p to the raster files containing
information for the given bands. Because some bands are available in more than one
resolution, this can be filtered by prodiding a third parameter (e.g. resolution='10m').
`p` can be a string or a pathlib.Path.
`bands` can be a list of bands or a single band.
The returned paths are formatted in the zip scheme as per Apache Commons VFS if necessary
and can be directly opened by rasterio.
'''
if type(bands) != list:
# allow passing in a single band more easily
bands = [bands]
p = Path(p) # make sure we're dealing with a pathlib.Path
if p.suffix == '.zip':
# when dealing with zip files we have to read the filenames from the
# archive first
with ZipFile(p) as f:
files = f.namelist()
rasters = [f for f in files if f.endswith('.jp2')]
else:
rasters = p.glob('**/*.jp2')
# take only the paths that contain one of the given bands
rasters = [raster for band in bands for raster in rasters if band in raster]
# if a resolution is given, further discard the bands we don't need
if resolution:
rasters = [raster for raster in rasters if resolution in raster]
if p.suffix == '.zip':
# we have to reformat the paths to point inside the zip archive
rasters = [f'zip+file://{p}!/{r}' for r in rasters]
return rasters
def scihub_bgr_paths(product_path, resolution=None):
'''
A convenence function to return the paths to the blue, green and red bands
in the downloaded product at `product_path`.
'''
return scihub_band_paths(product_path, ['B02', 'B03', 'B04'], resolution)
def scihub_cloud_mask(product_path, **kwargs):
'''
Given a `product_path` pointing to a product downlaoded from the Copernicus
Open Access Hub, returns a shapely geometry representing the included cloud
mask.
If an additional parameter, `rasterize=True` is given, the returned cloud
mask will be a rasterized numpy ndarray instead of a vector geometry. Two
additional parameters, `target_path` and `target_transform` are needed to
determine the size of this array. In this array, pixels with clouds are
`False` and pixels without clouds are `True`.
'''
with TemporaryDirectory() as tmp_dir:
# we need the temporary directory to work around a problem with reading
# vector files from zip archives
p = Path(product_path)
if p.suffix == '.zip':
# when dealing with zip files we have to read the filenames from the
# archive first
with ZipFile(p) as f:
files = f.namelist()
file = [f for f in files if f.endswith('MSK_CLOUDS_B00.gml')][0]
f.extract(file, tmp_dir)
file = Path(tmp_dir) / file
else:
file = list(p.glob('**/MSK_CLOUDS_B00.gml'))[0]
try:
with fiona.open(file) as features:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# this returns a warning because the iterator has to be
# rewound; while this is a performance issue, we can ignore it
mask = unary_union([shape(f['geometry']) for f in features])
except ValueError:
# empty cloud mask
mask = Polygon([])
if kwargs.get('rasterize'):
# return raster version of the vector geometry we found above
target_shape = kwargs.get('target_shape')
target_transform = kwargs.get('target_transform')
if not target_transform or not target_shape:
error_msg = 'target_transform and target_shape need to be set ' + \
'to construct a rasterized cloud mask.'
raise ValueError(error_msg)
# completely empty cloud masks have to be handled separately
if mask.is_empty:
return np.full(target_shape, True)
return geometry_mask(mask,
out_shape=target_shape,
transform=target_transform)
else:
return mask
def scihub_normalize_range(v):
'''
Raster files downloaded from the Copernicus Open Access Hub can contain
pixels with reflectance values outside of the allowed range. This function
discards those values and normalizes the range of the returned raster file
to be [0...1].
'''
return np.clip(v, 0, 2000) / 2000
def scihub_band_date(band):
'''
Given a string, `pathlib.Path` or `rasterio.DataSetReader`, returns the
datetime encoded in the filename.
'''
if type(band) is r.DatasetReader:
file_name = band.name
else:
file_name = Path(band).name
return parse_datetime(file_name.split('_')[-3])
# TODO: This is documented somewhere in the python docs, we should link to it here
class RasterReaderList():
'''
This class allows opening a list of file paths in a `with` block using
rasterio.open. They get automatically closed when the context created by
the `with` block is left.
'''
def __init__(self, paths):
self.open_files = []
self.paths = paths
def __enter__(self):
for f in self.paths:
self.open_files.append(r.open(f))
return self.open_files
def __exit__(self, _type, _value, _traceback):
for f in self.open_files:
f.close()
def geodataframe_on_map(geodataframe):
'''
Plot a GeoDataframe or GeoSeries on a Leaflet map; map automatically
centers
'''
bbox = geodataframe.unary_union.bounds
minx, miny, maxx, maxy = bbox
m = folium.Map([0, 0], tiles='cartodbpositron', scroll_wheel_zoom=False)
folium.GeoJson(geodataframe.to_json()).add_to(m)
m.fit_bounds([[miny, minx], [maxy, maxx]])
return m