Skip to content

Commit 689fca8

Browse files
v0.0.28
fix "pandas >= 2.0" error in aggregate_mean()
1 parent bc1ce5c commit 689fca8

3 files changed

Lines changed: 81 additions & 13 deletions

File tree

notebooks/00_spot_doc.ipynb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,63 @@
975975
"shgo_class(rosen, bounds)"
976976
]
977977
},
978+
{
979+
"attachments": {},
980+
"cell_type": "markdown",
981+
"metadata": {},
982+
"source": [
983+
"# Tests"
984+
]
985+
},
986+
{
987+
"cell_type": "code",
988+
"execution_count": 1,
989+
"metadata": {},
990+
"outputs": [
991+
{
992+
"ename": "TypeError",
993+
"evalue": "'in <string>' requires string as left operand, not pandas._libs.properties.CachedProperty",
994+
"output_type": "error",
995+
"traceback": [
996+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
997+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
998+
"\u001b[1;32m/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb Cell 72\u001b[0m in \u001b[0;36m1\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m y \u001b[39m=\u001b[39m fun(X)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m S \u001b[39m=\u001b[39m Kriging(name\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mkriging\u001b[39m\u001b[39m'\u001b[39m, seed\u001b[39m=\u001b[39m\u001b[39m123\u001b[39m)\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=15'>16</a>\u001b[0m S\u001b[39m.\u001b[39;49mfit(X,y)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m X2 \u001b[39m=\u001b[39m \u001b[39m2\u001b[39m\u001b[39m*\u001b[39mX\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/bartz/workspace/spotPython/notebooks/00_spot_doc.ipynb#Y130sZmlsZQ%3D%3D?line=18'>19</a>\u001b[0m Y \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mempty_like(X2)\n",
999+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/build/kriging.py:333\u001b[0m, in \u001b[0;36mKriging.fit\u001b[0;34m(self, nat_X, nat_y)\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mint_mask \u001b[39m=\u001b[39m array(\u001b[39mlist\u001b[39m(\u001b[39mmap\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mint\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvar_type)))\n\u001b[1;32m 332\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mordered_mask \u001b[39m=\u001b[39m array(\u001b[39mlist\u001b[39m(\u001b[39mmap\u001b[39m(\u001b[39mlambda\u001b[39;00m x: x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mint\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mor\u001b[39;00m x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mnum\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mor\u001b[39;00m x \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mfloat\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mvar_type)))\n\u001b[0;32m--> 333\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_to_cod_init()\n\u001b[1;32m 334\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_theta \u001b[39m>\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mk:\n\u001b[1;32m 335\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mn_theta \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mk\n",
1000+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/build/kriging.py:907\u001b[0m, in \u001b[0;36mKriging.nat_to_cod_init\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 905\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_mean_y \u001b[39m=\u001b[39m mean(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_y)\n\u001b[1;32m 906\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_std_y \u001b[39m=\u001b[39m std(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnat_y)\n\u001b[0;32m--> 907\u001b[0m Z \u001b[39m=\u001b[39m aggregate_mean_var(X\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_X, y\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mnat_y)\n\u001b[1;32m 908\u001b[0m mu \u001b[39m=\u001b[39m Z[\u001b[39m1\u001b[39m]\n\u001b[1;32m 909\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmean_cod_y \u001b[39m=\u001b[39m empty_like(mu)\n",
1001+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/spotPython/utils/aggregate.py:18\u001b[0m, in \u001b[0;36maggregate_mean_var\u001b[0;34m(X, y, sort)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39maggregate_mean_var\u001b[39m(X, y, sort\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m):\n\u001b[1;32m 6\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[39m Aggregate array to mean.\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[39m (numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m df \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mDataFrame(X, dtype\u001b[39m=\u001b[39;49mpd\u001b[39m.\u001b[39;49mFloat64Dtype)\n\u001b[1;32m 19\u001b[0m \u001b[39m# df.columns=[\"X\"+str(i) for i in range(df.shape[1])]\u001b[39;00m\n\u001b[1;32m 20\u001b[0m df \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39massign(y\u001b[39m=\u001b[39my)\n",
1002+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/frame.py:757\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 746\u001b[0m mgr \u001b[39m=\u001b[39m dict_to_mgr(\n\u001b[1;32m 747\u001b[0m \u001b[39m# error: Item \"ndarray\" of \"Union[ndarray, Series, Index]\" has no\u001b[39;00m\n\u001b[1;32m 748\u001b[0m \u001b[39m# attribute \"name\"\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 754\u001b[0m copy\u001b[39m=\u001b[39m_copy,\n\u001b[1;32m 755\u001b[0m )\n\u001b[1;32m 756\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 757\u001b[0m mgr \u001b[39m=\u001b[39m ndarray_to_mgr(\n\u001b[1;32m 758\u001b[0m data,\n\u001b[1;32m 759\u001b[0m index,\n\u001b[1;32m 760\u001b[0m columns,\n\u001b[1;32m 761\u001b[0m dtype\u001b[39m=\u001b[39;49mdtype,\n\u001b[1;32m 762\u001b[0m copy\u001b[39m=\u001b[39;49mcopy,\n\u001b[1;32m 763\u001b[0m typ\u001b[39m=\u001b[39;49mmanager,\n\u001b[1;32m 764\u001b[0m )\n\u001b[1;32m 766\u001b[0m \u001b[39m# For data is list-like, or Iterable (will consume into list)\u001b[39;00m\n\u001b[1;32m 767\u001b[0m \u001b[39melif\u001b[39;00m is_list_like(data):\n",
1003+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/internals/construction.py:311\u001b[0m, in \u001b[0;36mndarray_to_mgr\u001b[0;34m(values, index, columns, dtype, copy, typ)\u001b[0m\n\u001b[1;32m 305\u001b[0m values \u001b[39m=\u001b[39m _ensure_2d(values)\n\u001b[1;32m 307\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(values, (np\u001b[39m.\u001b[39mndarray, ExtensionArray)):\n\u001b[1;32m 308\u001b[0m \u001b[39m# drop subclass info\u001b[39;00m\n\u001b[1;32m 309\u001b[0m _copy \u001b[39m=\u001b[39m (\n\u001b[1;32m 310\u001b[0m copy_on_sanitize\n\u001b[0;32m--> 311\u001b[0m \u001b[39mif\u001b[39;00m (dtype \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39mor\u001b[39;00m astype_is_view(values\u001b[39m.\u001b[39;49mdtype, dtype))\n\u001b[1;32m 312\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 313\u001b[0m )\n\u001b[1;32m 314\u001b[0m values \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39marray(values, copy\u001b[39m=\u001b[39m_copy)\n\u001b[1;32m 315\u001b[0m values \u001b[39m=\u001b[39m _ensure_2d(values)\n",
1004+
"File \u001b[0;32m~/miniforge3/envs/spotCondaEnv/lib/python3.10/site-packages/pandas/core/dtypes/astype.py:284\u001b[0m, in \u001b[0;36mastype_is_view\u001b[0;34m(dtype, new_dtype)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[39melif\u001b[39;00m is_object_dtype(dtype) \u001b[39mand\u001b[39;00m new_dtype\u001b[39m.\u001b[39mkind \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mO\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 281\u001b[0m \u001b[39m# When the underlying array has dtype object, we don't have to make a copy\u001b[39;00m\n\u001b[1;32m 282\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mTrue\u001b[39;00m\n\u001b[0;32m--> 284\u001b[0m \u001b[39melif\u001b[39;00m dtype\u001b[39m.\u001b[39;49mkind \u001b[39min\u001b[39;49;00m \u001b[39m\"\u001b[39;49m\u001b[39mmM\u001b[39;49m\u001b[39m\"\u001b[39;49m \u001b[39mand\u001b[39;00m new_dtype\u001b[39m.\u001b[39mkind \u001b[39min\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mmM\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[1;32m 285\u001b[0m dtype \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(dtype, \u001b[39m\"\u001b[39m\u001b[39mnumpy_dtype\u001b[39m\u001b[39m\"\u001b[39m, dtype)\n\u001b[1;32m 286\u001b[0m new_dtype \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(new_dtype, \u001b[39m\"\u001b[39m\u001b[39mnumpy_dtype\u001b[39m\u001b[39m\"\u001b[39m, new_dtype)\n",
1005+
"\u001b[0;31mTypeError\u001b[0m: 'in <string>' requires string as left operand, not pandas._libs.properties.CachedProperty"
1006+
]
1007+
}
1008+
],
1009+
"source": [
1010+
"from spotPython.fun.objectivefunctions import analytical\n",
1011+
"from spotPython.design.factorial import factorial\n",
1012+
"from spotPython.build.kriging import Kriging\n",
1013+
"import numpy as np\n",
1014+
"\n",
1015+
"gen = factorial(3)\n",
1016+
"rng = np.random.RandomState(1)\n",
1017+
"lower = np.array([-1,-1])\n",
1018+
"upper = np.array([0,0])\n",
1019+
"fun = analytical().fun_linear\n",
1020+
"X = gen.full_factorial(3)\n",
1021+
"X = 10*X\n",
1022+
"y = fun(X)\n",
1023+
"\n",
1024+
"S = Kriging(name='kriging', seed=123)\n",
1025+
"S.fit(X,y)\n",
1026+
"X2 = 2*X\n",
1027+
"\n",
1028+
"Y = np.empty_like(X2)\n",
1029+
"T = np.empty_like(X2)\n",
1030+
"for i in range(S.n):\n",
1031+
" T[i] = S.nat_to_cod_x(X2[i])\n",
1032+
" Y[i] = S.cod_to_nat_x(T[i])"
1033+
]
1034+
},
9781035
{
9791036
"cell_type": "code",
9801037
"execution_count": null,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
77

88
[project]
99
name = "spotPython"
10-
version = "0.0.27"
10+
version = "0.0.28"
1111
authors = [
1212
{ name="T. Bartz-Beielstein", email="tbb@bartzundbartz.de" }
1313
]

src/spotPython/utils/aggregate.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,39 @@
11
import pandas as pd
22
import numpy as np
33

4-
54
def aggregate_mean_var(X, y, sort=False):
65
"""
76
Aggregate array to mean.
87
98
Args:
109
X (numpy.ndarray): X array, shape `(n, k)`.
1110
y (numpy.ndarray): values, shape `(n,)`.
11+
sort (bool): Whether to sort the resulting DataFrame by the group keys.
1212
1313
Returns:
14-
(numpy.ndarray): aggregated `X` values, shape `(n-m, k)`, if `m`duplicates in `X`.
15-
(numpy.ndarray): aggregated (mean per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.
16-
(numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m`duplicates in `X`.
14+
(numpy.ndarray): aggregated `X` values, shape `(n-m, k)`, if `m` duplicates in `X`.
15+
(numpy.ndarray): aggregated (mean per group) `y` values, shape `(1,)`, if `m` duplicates in `X`.
16+
(numpy.ndarray): aggregated (variance per group) `y` values, shape `(1,)`, if `m` duplicates in `X`.
1717
"""
18-
df = pd.DataFrame(X, dtype=pd.Float64Dtype)
19-
# df.columns=["X"+str(i) for i in range(df.shape[1])]
20-
df = df.assign(y=y)
21-
df_m = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort).mean()
22-
df_var = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort).var()
23-
A = df_m.to_numpy(dtype=np.float64)
24-
B = df_var.to_numpy()
25-
return np.delete(A, -1, 1), A[:, -1], B[:, -1]
18+
# Create a DataFrame from X and y
19+
df = pd.DataFrame(X)
20+
df["y"] = y
21+
22+
# Group by all columns except 'y' and calculate the mean and variance of 'y' for each group
23+
grouped = df.groupby(list(df.columns.difference(["y"])), as_index=False, sort=sort)
24+
df_mean = grouped.mean()
25+
df_var = grouped.var()
26+
27+
# Convert the resulting DataFrames to numpy arrays
28+
mean_array = df_mean.to_numpy()
29+
var_array = df_var.to_numpy()
30+
31+
# Split the resulting arrays into separate arrays for X and y
32+
X_agg = np.delete(mean_array, -1, 1)
33+
y_mean = mean_array[:, -1]
34+
y_var = var_array[:, -1]
35+
36+
return X_agg, y_mean, y_var
2637

2738

2839
def get_ranks(x):

0 commit comments

Comments
 (0)