Remove extra folders and rename source folder

This commit is contained in:
heyarne 2021-03-01 10:36:31 +00:00
commit ff7520e58d
46 changed files with 0 additions and 1402 deletions

View file

@ -0,0 +1,304 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Corrupted Zipfiles\n",
"\n",
"Out of the 40 files we were trying to download, some needed to be fetched from the [Long-Term Archive](https://scihub.copernicus.eu/userguide/#LTA_Long_Term_Archive_Access).\n",
"After retrying the download several times, all files could be retrieved.\n",
"However, some of the downloaded zip files are suspiciously small:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 25M input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n",
" 29M input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
" 29M input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
" 30M input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
" 31M input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
" 35M input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
" 38M input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
" 42M input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
" 43M input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"723M input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip\n",
"753M input/tempelhofer_feld/S2B_MSIL2A_20190409T101029_N0211_R022_T32UQD_20190409T134504.zip\n",
"761M input/tempelhofer_feld/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip\n",
"764M input/tempelhofer_feld/S2B_MSIL2A_20190218T101059_N0211_R022_T32UQD_20190218T161620.zip\n",
"766M input/tempelhofer_feld/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip\n",
"768M input/tempelhofer_feld/S2B_MSIL2A_20190529T101039_N0212_R022_T32UQD_20190529T130331.zip\n",
"771M input/tempelhofer_feld/S2B_MSIL2A_20190906T101029_N0213_R022_T32UQD_20190906T133832.zip\n",
"774M input/tempelhofer_feld/S2B_MSIL2A_20190728T101029_N0213_R022_T32UQD_20190728T134658.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190519T101039_N0212_R022_T33UUU_20190519T132053.zip\n",
"789M input/tempelhofer_feld/S2B_MSIL2A_20190827T101029_N0213_R022_T32UQD_20190827T134854.zip\n",
"802M input/tempelhofer_feld/S2A_MSIL2A_20191130T101401_N0213_R022_T33UUU_20191130T115440.zip\n",
"802M input/tempelhofer_feld/S2B_MSIL2A_20191205T101309_N0213_R022_T33UUU_20191205T122401.zip\n",
"809M input/tempelhofer_feld/S2A_MSIL2A_20191220T101431_N0213_R022_T33UUU_20191220T115219.zip\n",
"813M input/tempelhofer_feld/S2A_MSIL2A_20190921T101031_N0213_R022_T33UUU_20190921T130515.zip\n",
"819M input/tempelhofer_feld/S2A_MSIL2A_20191210T101411_N0213_R022_T33UUU_20191210T114322.zip\n",
"823M input/tempelhofer_feld/S2B_MSIL2A_20190718T101039_N0213_R022_T33UUU_20190718T131731.zip\n",
"823M input/tempelhofer_feld/S2A_MSIL2A_20190713T101031_N0213_R022_T33UUU_20190713T135651.zip\n",
"829M input/tempelhofer_feld/S2A_MSIL2A_20190911T101021_N0213_R022_T33UUU_20190911T143947.zip\n",
"845M input/tempelhofer_feld/S2A_MSIL2A_20190723T101031_N0213_R022_T33UUU_20190723T125722.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191213T102421_N0213_R065_T33UUU_20191213T120011.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190422T102029_N0211_R065_T32UQD_20190422T133643.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20191029T102039_N0213_R065_T32UQD_20191029T134629.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190402T102029_N0211_R065_T33UUU_20190402T135010.zip\n",
"1.1G input/tempelhofer_feld/S2B_MSIL2A_20190711T102029_N0213_R065_T33UUU_20190711T135545.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190417T102031_N0211_R065_T33UUU_20190417T130913.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190726T102031_N0213_R065_T33UUU_20190726T125507.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20191014T102031_N0213_R065_T32UQD_20191014T130941.zip\n",
"1.1G input/tempelhofer_feld/S2A_MSIL2A_20190825T102031_N0213_R065_T33UUU_20190825T134836.zip\n",
"1.2G input/tempelhofer_feld/S2B_MSIL2A_20190601T102029_N0212_R065_T33UUU_20190601T135040.zip\n"
]
}
],
"source": [
"! ls -rSsh input/tempelhofer_feld/*.zip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Trying to extract them causes an error:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n",
" End-of-central-directory signature not found. Either this file is not\n",
" a zipfile, or it constitutes one disk of a multi-part archive. In the\n",
" latter case the central directory and zipfile comment will be found on\n",
" the last disk(s) of this archive.\n",
"unzip: cannot find zipfile directory in one of input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip or\n",
" input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.zip, and cannot find input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.ZIP, period.\n"
]
}
],
"source": [
"! ls -S input/tempelhofer_feld/*.zip | tail -n1 | xargs unzip"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## What does the API say?"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sentinelsat"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"api = sentinelsat.SentinelAPI(os.getenv('SCIHUB_USERNAME'), os.getenv('SCIHUB_PASSWORD'))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.8/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n",
" return _prepare_from_string(\" \".join(pjargs))\n"
]
}
],
"source": [
"res = api.to_geodataframe(api.query(raw='S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509'))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"bedec483-5ee1-4264-8dfa-a3b53ce364f7 816.67 MB\n",
"Name: size, dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res['size']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see that the size given by the scihub api is way larger."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Do the downloads fail repeatedly?\n",
"\n",
"All files have been downloaded again to another folder, `input/tempelhofer_feld_test`."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld_test/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n",
"5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld_test/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n",
"f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld_test/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n",
"5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld_test/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n",
"8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld_test/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n",
"7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld_test/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n",
"7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld_test/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n",
"b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld_test/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n",
"f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld_test/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n",
"53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld_test/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n"
]
}
],
"source": [
"! find input/tempelhofer_feld_test -type f -size -500M -name '*.zip' | xargs md5sum"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The downloads are failing in exactly the same way when trying the downloads repeatedly."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manual Download"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"https://scihub.copernicus.eu/apihub/odata/v1/Products('bedec483-5ee1-4264-8dfa-a3b53ce364f7')/$value\""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"res['link'].iloc[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When following the link above, the target file is 25MB.\n",
"This points towards an error on the side of scihub."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}