diff --git a/reports/heatmap.ipynb b/reports/annual_category_location_heatmap.ipynb similarity index 66% rename from reports/heatmap.ipynb rename to reports/annual_category_location_heatmap.ipynb index 4460000..7c9a219 100644 --- a/reports/heatmap.ipynb +++ b/reports/annual_category_location_heatmap.ipynb @@ -26,8 +26,11 @@ "# Species category to report on\n", "category = \"\"\n", "\n", - "# Export format - either PNG or PDF\n", - "export_format = \"PNG\"" + "# Export format for the heatmap chart:\n", + "# PNG - export as PNG image\n", + "# PDF - export as PDF file\n", + "# - do not export\n", + "export_format = \"\"" ] }, { @@ -65,7 +68,12 @@ "# Connect to the database, execute the query and read the results into a dataframe\n", "database_path = os.environ[\"NATURE_RECORDER_DB\"]\n", "connection = sqlite3.connect(database_path)\n", - "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])" + "df = pd.read_sql_query(query, connection, parse_dates=[\"Date\"])\n", + "\n", + "# Check there is some data\n", + "if not df.shape[0]:\n", + " message = f\"No data found for category '{category}' at location '{location}' during '{year}'\"\n", + " raise ValueError(message)" ] }, { @@ -76,11 +84,38 @@ "outputs": [], "source": [ "import calendar\n", + "import re\n", + "\n", + "# Create the folder to hold exported reports\n", + "export_folder_path = Path(\"exported\")\n", + "export_folder_path.mkdir(parents=True, exist_ok=True)\n", "\n", "# Pre-process the data to provide a heatmap data source\n", "df[\"Month\"] = df[\"Date\"].dt.month\n", - "heatmap_data = df.groupby([\"Species\", \"Month\"]).size().unstack(fill_value=0)\n", - "heatmap_data.columns = [calendar.month_abbr[m] for m in heatmap_data.columns]" + "heatmap_data = df.groupby([\"Species\", \"Month\"])[\"Count\"].sum().unstack(fill_value=0)\n", + "\n", + "# Make sure all months are represented even if there are no sightings in the data set for that month\n", + "for month in range(1, 13):\n", + " if month not in heatmap_data.columns:\n", + " heatmap_data[month] = 0\n", + "\n", + "# Re-order the columns in calendar order\n", + "heatmap_data = heatmap_data[sorted(heatmap_data.columns)]\n", + "\n", + "# Set the labels to the month abbreviations rather than month numbers\n", + "heatmap_data.columns = [calendar.month_abbr[m] for m in heatmap_data.columns]\n", + "\n", + "# Export the heatmap data to Excel\n", + "clean_location = re.sub(\"[^0-9a-zA-Z\\-]+\", \"\", location.replace(\" \", \"-\"))\n", + "export_file_path = export_folder_path / f\"{year}-{category}-{clean_location}-Heatmap.xlsx\"\n", + "heatmap_data.to_excel(export_file_path.absolute(), sheet_name=\"Sightings\")\n", + "\n", + "# Print the heatmap data\n", + "with pd.option_context('display.max_rows', None,\n", + " 'display.max_columns', None,\n", + " 'display.precision', 3,\n", + " ):\n", + " display(heatmap_data)" ] }, { @@ -94,24 +129,20 @@ "import matplotlib.pyplot as plt\n", "\n", "# Generate the heatmap\n", - "plt.figure(figsize=(12, 24))\n", + "plt.figure(figsize=(12, heatmap_data.shape[0] / 3))\n", "sns.heatmap(heatmap_data, cmap=\"YlOrRd\", annot=False)\n", "plt.title(f\"Number of Sightings of {category} at {location} in {year}\")\n", "plt.xlabel(\"\")\n", "plt.ylabel(\"\")\n", "\n", - "# Create the folder to hold exported reports\n", - "export_folder_path = Path(\"exported\")\n", - "export_folder_path.mkdir(parents=True, exist_ok=True)\n", - "\n", "# Export to PNG\n", "if export_format.casefold() == \"png\":\n", - " export_file_path = export_folder_path / f\"{year}-{category}-Heatmap.png\"\n", + " export_file_path = export_folder_path / f\"{year}-{category}-{clean_location}-Heatmap.png\"\n", " plt.savefig(export_file_path.absolute(), format=\"png\", dpi=300, bbox_inches=\"tight\")\n", "\n", "# Export to PDF\n", "if export_format.casefold() == \"pdf\":\n", - " export_file_path = Path(\"exported\") / f\"{year}-{category}-Heatmap.pdf\"\n", + " export_file_path = export_folder_path / f\"{year}-{category}-{clean_location}-Heatmap.pdf\"\n", " plt.savefig(export_file_path.absolute(), format=\"pdf\", bbox_inches=\"tight\")\n", "\n", "# And show the plot\n", @@ -121,7 +152,7 @@ ], "metadata": { "kernelspec": { - "display_name": "venv", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/reports/requirements.txt b/reports/requirements.txt index 6d6825a..fa8ba65 100644 --- a/reports/requirements.txt +++ b/reports/requirements.txt @@ -18,6 +18,7 @@ cycler==0.12.1 debugpy==1.8.14 decorator==5.2.1 defusedxml==0.7.1 +et_xmlfile==2.0.0 executing==2.2.0 fastjsonschema==2.21.1 fonttools==4.57.0 @@ -61,6 +62,7 @@ nest-asyncio==1.6.0 notebook==7.4.0 notebook_shim==0.2.4 numpy==2.2.4 +openpyxl==3.1.5 overrides==7.7.0 packaging==24.2 pandas==2.2.3 diff --git a/reports/sql/sightings.sql b/reports/sql/sightings.sql index 7f22c56..da1f68d 100644 --- a/reports/sql/sightings.sql +++ b/reports/sql/sightings.sql @@ -1,4 +1,4 @@ -SELECT l.Name AS 'Location', sp.Name AS 'Species', sp.Name AS 'Category', DATE( s.Date ) AS 'Date' +SELECT l.Name AS 'Location', sp.Name AS 'Species', sp.Name AS 'Category', DATE( s.Date ) AS 'Date', IFNULL( s.Number, 1 ) AS 'Count' FROM SIGHTINGS s INNER JOIN SPECIES sp ON sp.Id = s.SpeciesId INNER JOIN CATEGORIES c ON c.Id = sp.CategoryId @@ -6,4 +6,4 @@ INNER JOIN LOCATIONS l ON l.Id = s.LocationId WHERE l.Name = '$LOCATION' AND s.Date LIKE '$YEAR-%' AND sp.Name LIKE '%$SPECIES%' -AND c.Name = "$CATEGORY"; \ No newline at end of file +AND c.Name = "$CATEGORY";