v0.0.28

bartzbeielstein · bartzbeielstein · commit 689fca81faf3 · 2023-04-09T22:29:29.000+02:00
fix "pandas &gt;= 2.0" error in aggregate_mean()
diff --git a/notebooks/00_spot_doc.ipynb b/notebooks/00_spot_doc.ipynb
@@ -975,6 +975,63 @@
     "shgo_class(rosen, bounds)"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'in <string>' requires string as left operand, not pandas._libs.properties.CachedProperty",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb Cell 72\u001b[0m in \u001b[0;36m1\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m y \u001b[39m=\u001b[39m fun(X)\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m S \u001b[39m=\u001b[39m Kriging(name\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mkriging\u001b[39m\u001b[39m'\u001b[39m,  seed\u001b[39m=\u001b[39m\u001b[39m123\u001b[39m)\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m S\u001b[39m.\u001b[39;49mfit(X,y)\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m X2 \u001b[39m=\u001b[39m \u001b[39m2\u001b[39m\u001b[39m*\u001b[39mX\n\u001b[1;32m     <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m Y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mempty_like(X2)\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/build/kriging.py:333\u001b[0m, in \u001b[0;36mKriging.fit\u001b[0;34m(self, nat_X, nat_y)\u001b[0m\n\u001b[1;32m    331\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mint_mask \u001b[39m=\u001b[39m array(\u001b[39mlist\u001b[39m(\u001b[39mmap\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mint\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvar_type)))\n\u001b[1;32m    332\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mordered_mask \u001b[39m=\u001b[39m array(\u001b[39mlist\u001b[39m(\u001b[39mmap\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mint\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mor\u001b[39;00m x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mnum\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mor\u001b[39;00m x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfloat\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvar_type)))\n\u001b[0;32m--> 333\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_to_cod_init()\n\u001b[1;32m    334\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_theta \u001b[39m>\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mk:\n\u001b[1;32m    335\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_theta \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mk\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/build/kriging.py:907\u001b[0m, in \u001b[0;36mKriging.nat_to_cod_init\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    905\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_mean_y \u001b[39m=\u001b[39m mean(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_y)\n\u001b[1;32m    906\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_std_y \u001b[39m=\u001b[39m std(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_y)\n\u001b[0;32m--> 907\u001b[0m Z \u001b[39m=\u001b[39m aggregate_mean_var(X\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_X, y\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_y)\n\u001b[1;32m    908\u001b[0m mu \u001b[39m=\u001b[39m Z[\u001b[39m1\u001b[39m]\n\u001b[1;32m    909\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmean_cod_y \u001b[39m=\u001b[39m empty_like(mu)\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/utils/aggregate.py:18\u001b[0m, in \u001b[0;36maggregate_mean_var\u001b[0;34m(X, y, sort)\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39maggregate_mean_var\u001b[39m(X, y, sort\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n\u001b[1;32m      6\u001b[0m     \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[39m    Aggregate array to mean.\u001b[39;00m\n\u001b[1;32m      8\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[39m        (numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.\u001b[39;00m\n\u001b[1;32m     17\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m     df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame(X, dtype\u001b[39m=\u001b[39;49mpd\u001b[39m.\u001b[39;49mFloat64Dtype)\n\u001b[1;32m     19\u001b[0m     \u001b[39m# df.columns=[\"X\"+str(i) for i in range(df.shape[1])]\u001b[39;00m\n\u001b[1;32m     20\u001b[0m     df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39massign(y\u001b[39m=\u001b[39my)\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/frame.py:757\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m    746\u001b[0m         mgr \u001b[39m=\u001b[39m dict_to_mgr(\n\u001b[1;32m    747\u001b[0m             \u001b[39m# error: Item \"ndarray\" of \"Union[ndarray, Series, Index]\" has no\u001b[39;00m\n\u001b[1;32m    748\u001b[0m             \u001b[39m# attribute \"name\"\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    754\u001b[0m             copy\u001b[39m=\u001b[39m_copy,\n\u001b[1;32m    755\u001b[0m         )\n\u001b[1;32m    756\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 757\u001b[0m         mgr \u001b[39m=\u001b[39m ndarray_to_mgr(\n\u001b[1;32m    758\u001b[0m             data,\n\u001b[1;32m    759\u001b[0m             index,\n\u001b[1;32m    760\u001b[0m             columns,\n\u001b[1;32m    761\u001b[0m             dtype\u001b[39m=\u001b[39;49mdtype,\n\u001b[1;32m    762\u001b[0m             copy\u001b[39m=\u001b[39;49mcopy,\n\u001b[1;32m    763\u001b[0m             typ\u001b[39m=\u001b[39;49mmanager,\n\u001b[1;32m    764\u001b[0m         )\n\u001b[1;32m    766\u001b[0m \u001b[39m# For data is list-like, or Iterable (will consume into list)\u001b[39;00m\n\u001b[1;32m    767\u001b[0m \u001b[39melif\u001b[39;00m is_list_like(data):\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/internals/construction.py:311\u001b[0m, in \u001b[0;36mndarray_to_mgr\u001b[0;34m(values, index, columns, dtype, copy, typ)\u001b[0m\n\u001b[1;32m    305\u001b[0m     values \u001b[39m=\u001b[39m _ensure_2d(values)\n\u001b[1;32m    307\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(values, (np\u001b[39m.\u001b[39mndarray, ExtensionArray)):\n\u001b[1;32m    308\u001b[0m     \u001b[39m# drop subclass info\u001b[39;00m\n\u001b[1;32m    309\u001b[0m     _copy \u001b[39m=\u001b[39m (\n\u001b[1;32m    310\u001b[0m         copy_on_sanitize\n\u001b[0;32m--> 311\u001b[0m         \u001b[39mif\u001b[39;00m (dtype \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m astype_is_view(values\u001b[39m.\u001b[39;49mdtype, dtype))\n\u001b[1;32m    312\u001b[0m         \u001b[39melse\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m    313\u001b[0m     )\n\u001b[1;32m    314\u001b[0m     values \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray(values, copy\u001b[39m=\u001b[39m_copy)\n\u001b[1;32m    315\u001b[0m     values \u001b[39m=\u001b[39m _ensure_2d(values)\n",
+      "File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:284\u001b[0m, in \u001b[0;36mastype_is_view\u001b[0;34m(dtype, new_dtype)\u001b[0m\n\u001b[1;32m    280\u001b[0m \u001b[39melif\u001b[39;00m is_object_dtype(dtype) \u001b[39mand\u001b[39;00m new_dtype\u001b[39m.\u001b[39mkind \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mO\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m    281\u001b[0m     \u001b[39m# When the underlying array has dtype object, we don't have to make a copy\u001b[39;00m\n\u001b[1;32m    282\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m--> 284\u001b[0m \u001b[39melif\u001b[39;00m dtype\u001b[39m.\u001b[39;49mkind \u001b[39min\u001b[39;49;00m \u001b[39m\"\u001b[39;49m\u001b[39mmM\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39mand\u001b[39;00m new_dtype\u001b[39m.\u001b[39mkind \u001b[39min\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mmM\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m    285\u001b[0m     dtype \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(dtype, \u001b[39m\"\u001b[39m\u001b[39mnumpy_dtype\u001b[39m\u001b[39m\"\u001b[39m, dtype)\n\u001b[1;32m    286\u001b[0m     new_dtype \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(new_dtype, \u001b[39m\"\u001b[39m\u001b[39mnumpy_dtype\u001b[39m\u001b[39m\"\u001b[39m, new_dtype)\n",
+      "\u001b[0;31mTypeError\u001b[0m: 'in <string>' requires string as left operand, not pandas._libs.properties.CachedProperty"
+     ]
+    }
+   ],
+   "source": [
+    "from spotPython.fun.objectivefunctions import analytical\n",
+    "from spotPython.design.factorial import factorial\n",
+    "from spotPython.build.kriging import Kriging\n",
+    "import numpy as np\n",
+    "\n",
+    "gen = factorial(3)\n",
+    "rng = np.random.RandomState(1)\n",
+    "lower = np.array([-1,-1])\n",
+    "upper = np.array([0,0])\n",
+    "fun = analytical().fun_linear\n",
+    "X = gen.full_factorial(3)\n",
+    "X = 10*X\n",
+    "y = fun(X)\n",
+    "\n",
+    "S = Kriging(name='kriging',  seed=123)\n",
+    "S.fit(X,y)\n",
+    "X2 = 2*X\n",
+    "\n",
+    "Y = np.empty_like(X2)\n",
+    "T = np.empty_like(X2)\n",
+    "for i in range(S.n):\n",
+    "    T[i] = S.nat_to_cod_x(X2[i])\n",
+    "    Y[i] = S.cod_to_nat_x(T[i])"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "spotPython"
-version = "0.0.27"
+version = "0.0.28"
 authors = [
   { name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
 ]
diff --git a/src/spotPython/utils/aggregate.py b/src/spotPython/utils/aggregate.py
@@ -1,28 +1,39 @@
 import pandas as pd
 import numpy as np
 
-
 def aggregate_mean_var(X, y, sort=False):
     """
     Aggregate array to mean.
 
     Args:
         X (numpy.ndarray): X array, shape `(n, k)`.
         y (numpy.ndarray): values, shape `(n,)`.
+        sort (bool): Whether to sort the resulting DataFrame by the group keys.
 
     Returns:
-        (numpy.ndarray): aggregated `X` values, shape `(n-m, k)`, if `m`duplicates in `X`.
-        (numpy.ndarray): aggregated (mean per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.
-        (numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.
+        (numpy.ndarray): aggregated `X` values, shape `(n-m, k)`, if `m` duplicates in `X`.
+        (numpy.ndarray): aggregated (mean per group) `y` values, shape `(1,)`, if `m` duplicates in `X`.
+        (numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m` duplicates in `X`.
     """
-    df = pd.DataFrame(X, dtype=pd.Float64Dtype)
-    # df.columns=["X"+str(i) for i in range(df.shape[1])]
-    df = df.assign(y=y)
-    df_m = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort).mean()
-    df_var = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort).var()
-    A = df_m.to_numpy(dtype=np.float64)
-    B = df_var.to_numpy()
-    return np.delete(A, -1, 1), A[:, -1], B[:, -1]
+    # Create a DataFrame from X and y
+    df = pd.DataFrame(X)
+    df["y"] = y
+
+    # Group by all columns except 'y' and calculate the mean and variance of 'y' for each group
+    grouped = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort)
+    df_mean = grouped.mean()
+    df_var = grouped.var()
+
+    # Convert the resulting DataFrames to numpy arrays
+    mean_array = df_mean.to_numpy()
+    var_array = df_var.to_numpy()
+
+    # Split the resulting arrays into separate arrays for X and y
+    X_agg = np.delete(mean_array, -1, 1)
+    y_mean = mean_array[:, -1]
+    y_var = var_array[:, -1]
+
+    return X_agg, y_mean, y_var
 
 
 def get_ranks(x):

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"`
`7`	`7`
`8`	`8`	`[project]`
`9`	`9`	`name = "spotPython"`
`10`		`-version = "0.0.27"`
	`10`	`+version = "0.0.28"`
`11`	`11`	`authors = [`
`12`	`12`	`{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }`
`13`	`13`	`]`