{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Corrupted Zipfiles\n", "\n", "Out of the 40 files we were trying to download, some needed to be fetched from the [Long-Term Archive](https://scihub.copernicus.eu/userguide/#LTA_Long_Term_Archive_Access).\n", "After retrying the download several times, all files could be retrieved.\n", "However, some of the downloaded zip files are suspiciously small:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 25M input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n", " 29M input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n", " 29M input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n", " 30M input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n", " 30M input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n", " 31M input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n", " 35M input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n", " 38M input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n", " 42M input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n", " 43M input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n", "723M input/tempelhofer_feld/S2A_MSIL2A_20190114T101351_N0211_R022_T32UQD_20190114T113404.zip\n", "753M input/tempelhofer_feld/S2B_MSIL2A_20190409T101029_N0211_R022_T32UQD_20190409T134504.zip\n", "761M input/tempelhofer_feld/S2B_MSIL2A_20190320T101029_N0211_R022_T33UUU_20190320T195148.zip\n", "764M input/tempelhofer_feld/S2B_MSIL2A_20190218T101059_N0211_R022_T32UQD_20190218T161620.zip\n", "766M input/tempelhofer_feld/S2B_MSIL2A_20190330T101029_N0211_R022_T33UUU_20190330T144328.zip\n", "768M input/tempelhofer_feld/S2B_MSIL2A_20190529T101039_N0212_R022_T32UQD_20190529T130331.zip\n", "771M input/tempelhofer_feld/S2B_MSIL2A_20190906T101029_N0213_R022_T32UQD_20190906T133832.zip\n", "774M input/tempelhofer_feld/S2B_MSIL2A_20190728T101029_N0213_R022_T32UQD_20190728T134658.zip\n", "789M input/tempelhofer_feld/S2B_MSIL2A_20190519T101039_N0212_R022_T33UUU_20190519T132053.zip\n", "789M input/tempelhofer_feld/S2B_MSIL2A_20190827T101029_N0213_R022_T32UQD_20190827T134854.zip\n", "802M input/tempelhofer_feld/S2A_MSIL2A_20191130T101401_N0213_R022_T33UUU_20191130T115440.zip\n", "802M input/tempelhofer_feld/S2B_MSIL2A_20191205T101309_N0213_R022_T33UUU_20191205T122401.zip\n", "809M input/tempelhofer_feld/S2A_MSIL2A_20191220T101431_N0213_R022_T33UUU_20191220T115219.zip\n", "813M input/tempelhofer_feld/S2A_MSIL2A_20190921T101031_N0213_R022_T33UUU_20190921T130515.zip\n", "819M input/tempelhofer_feld/S2A_MSIL2A_20191210T101411_N0213_R022_T33UUU_20191210T114322.zip\n", "823M input/tempelhofer_feld/S2B_MSIL2A_20190718T101039_N0213_R022_T33UUU_20190718T131731.zip\n", "823M input/tempelhofer_feld/S2A_MSIL2A_20190713T101031_N0213_R022_T33UUU_20190713T135651.zip\n", "829M input/tempelhofer_feld/S2A_MSIL2A_20190911T101021_N0213_R022_T33UUU_20190911T143947.zip\n", "845M input/tempelhofer_feld/S2A_MSIL2A_20190723T101031_N0213_R022_T33UUU_20190723T125722.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20191213T102421_N0213_R065_T33UUU_20191213T120011.zip\n", "1.1G input/tempelhofer_feld/S2B_MSIL2A_20190422T102029_N0211_R065_T32UQD_20190422T133643.zip\n", "1.1G input/tempelhofer_feld/S2B_MSIL2A_20191029T102039_N0213_R065_T32UQD_20191029T134629.zip\n", "1.1G input/tempelhofer_feld/S2B_MSIL2A_20190402T102029_N0211_R065_T33UUU_20190402T135010.zip\n", "1.1G input/tempelhofer_feld/S2B_MSIL2A_20190711T102029_N0213_R065_T33UUU_20190711T135545.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20190417T102031_N0211_R065_T33UUU_20190417T130913.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20190626T102031_N0212_R065_T33UUU_20190626T125319.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20190726T102031_N0213_R065_T33UUU_20190726T125507.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20191014T102031_N0213_R065_T32UQD_20191014T130941.zip\n", "1.1G input/tempelhofer_feld/S2A_MSIL2A_20190825T102031_N0213_R065_T33UUU_20190825T134836.zip\n", "1.2G input/tempelhofer_feld/S2B_MSIL2A_20190601T102029_N0212_R065_T33UUU_20190601T135040.zip\n" ] } ], "source": [ "! ls -rSsh input/tempelhofer_feld/*.zip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Trying to extract them causes an error:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n", " End-of-central-directory signature not found. Either this file is not\n", " a zipfile, or it constitutes one disk of a multi-part archive. In the\n", " latter case the central directory and zipfile comment will be found on\n", " the last disk(s) of this archive.\n", "unzip: cannot find zipfile directory in one of input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip or\n", " input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.zip, and cannot find input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip.ZIP, period.\n" ] } ], "source": [ "! ls -S input/tempelhofer_feld/*.zip | tail -n1 | xargs unzip" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## What does the API say?" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sentinelsat" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "api = sentinelsat.SentinelAPI(os.getenv('SCIHUB_USERNAME'), os.getenv('SCIHUB_PASSWORD'))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/conda/lib/python3.8/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", " return _prepare_from_string(\" \".join(pjargs))\n" ] } ], "source": [ "res = api.to_geodataframe(api.query(raw='S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509'))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "bedec483-5ee1-4264-8dfa-a3b53ce364f7 816.67 MB\n", "Name: size, dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['size']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see that the size given by the scihub api is way larger." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Do the downloads fail repeatedly?\n", "\n", "All files have been downloaded again to another folder, `input/tempelhofer_feld_test`." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n", "5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n", "f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n", "5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n", "8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n", "7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n", "7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n", "b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n", "f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n", "53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n" ] } ], "source": [ "! find input/tempelhofer_feld -type f -size -500M -name '*.zip' | xargs md5sum" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "9ca05754c4cc5ff9d2bddf99e2e9e753 input/tempelhofer_feld_test/S2A_MSIL2A_20190603T101031_N0212_R022_T33UUU_20190603T114652.zip\n", "5424cf8c0dd4384382366b37af9ee995 input/tempelhofer_feld_test/S2A_MSIL2A_20190404T101031_N0211_R022_T32UQD_20190404T174806.zip\n", "f2050867b04f8911dfcd1412846f5f0e input/tempelhofer_feld_test/S2A_MSIL2A_20190216T102111_N0211_R065_T33UUU_20190216T130428.zip\n", "5c41f18b6c9745df406dbca49c50b0c7 input/tempelhofer_feld_test/S2B_MSIL2A_20190419T101029_N0211_R022_T33UUU_20190419T132322.zip\n", "8e9dc7b716056f702912d11197fab44c input/tempelhofer_feld_test/S2A_MSIL2A_20190407T102021_N0211_R065_T33UUU_20190407T134109.zip\n", "7241ca7fc6ccca5eb8935efe1b834697 input/tempelhofer_feld_test/S2B_MSIL2A_20190512T102029_N0212_R065_T33UUU_20190512T134103.zip\n", "7d2b67dac6f36f1d8744ec2ef296445f input/tempelhofer_feld_test/S2A_MSIL2A_20190613T101031_N0212_R022_T33UUU_20190614T125329.zip\n", "b078b9d41e7be70a89961214d4adb72b input/tempelhofer_feld_test/S2A_MSIL2A_20190424T101031_N0211_R022_T32UQD_20190424T162325.zip\n", "f4a2910be181bd1c85fba14e05ce69b1 input/tempelhofer_feld_test/S2A_MSIL2A_20190822T101031_N0213_R022_T32UQD_20190822T143621.zip\n", "53e1beb3f29dc1dc5b20745c3d66568e input/tempelhofer_feld_test/S2A_MSIL2A_20190623T101031_N0212_R022_T33UUU_20190623T132509.zip\n" ] } ], "source": [ "! find input/tempelhofer_feld_test -type f -size -500M -name '*.zip' | xargs md5sum" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The downloads are failing in exactly the same way when trying the downloads repeatedly." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Manual Download" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"https://scihub.copernicus.eu/apihub/odata/v1/Products('bedec483-5ee1-4264-8dfa-a3b53ce364f7')/$value\"" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['link'].iloc[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When following the link above, the target file is 25MB.\n", "This points towards an error on the side of scihub." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 4 }