Skip to content

features-barcodes-only#149

Open
joshua-gould wants to merge 10 commits into
mainfrom
features-barcodes-only
Open

features-barcodes-only#149
joshua-gould wants to merge 10 commits into
mainfrom
features-barcodes-only

Conversation

@joshua-gould
Copy link
Copy Markdown
Collaborator

No description provided.

@joshua-gould joshua-gould requested a review from jshleap June 3, 2026 20:13
Comment thread scallops/cli/features.py
Comment on lines +90 to +95
query_columns = [
c
for c in query_columns
if c not in merged_df.columns and c in data.var.index
]
columns.update(query_columns)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
query_columns = [
c
for c in query_columns
if c not in merged_df.columns and c in data.var.index
]
columns.update(query_columns)
columns.update(
c for c in query_columns
if c not in merged_df.columns and c in data.var.index
)

Comment thread scallops/cli/features.py
Comment on lines +98 to +99
for i in range(len(columns)):
merged_df[columns[i]] = values[:, i]
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for i in range(len(columns)):
merged_df[columns[i]] = values[:, i]
if sp.issparse(values):
values = values.toarray()
merged_df[columns] = values

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we need to test for sparcity (I think is a failsafe) we need to import scipy.sparse as sp

"--objects",
required=False,
help="Path to directory containing output from `find-objects`",
help="Path to directory containing output from `find-objects` or `merge`",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is intended that --objects and --merge can be used?

Comment on lines +654 to +675
for i in range(len(phenotype_paths)):
# match */A1-*.parquet and */A1.parquet
sep = phenotype_filesystems[i].sep
matches = phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}*{sep}{image_key}-*.parquet"
) + phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}*{sep}{image_key}.parquet"
)

if len(matches) == 0:
# match A1-*.parquet and A1.parquet
matches = phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}{image_key}-*.parquet"
) + phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}{image_key}.parquet"
)

for x in matches:
_phenotype_paths.append(phenotype_filesystems[i].unstrip_protocol(x))
if phenotype_suffix is not None:
_phenotype_suffix.append(phenotype_suffix[i])
return _phenotype_paths, _phenotype_suffix
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for i in range(len(phenotype_paths)):
# match */A1-*.parquet and */A1.parquet
sep = phenotype_filesystems[i].sep
matches = phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}*{sep}{image_key}-*.parquet"
) + phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}*{sep}{image_key}.parquet"
)
if len(matches) == 0:
# match A1-*.parquet and A1.parquet
matches = phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}{image_key}-*.parquet"
) + phenotype_filesystems[i].glob(
f"{phenotype_paths[i]}{sep}{image_key}.parquet"
)
for x in matches:
_phenotype_paths.append(phenotype_filesystems[i].unstrip_protocol(x))
if phenotype_suffix is not None:
_phenotype_suffix.append(phenotype_suffix[i])
return _phenotype_paths, _phenotype_suffix
has_suffix = phenotype_suffix is not None
suffix_iter = phenotype_suffix if has_suffix else [None] * len(phenotype_paths)
for path, fs, suffix in zip(phenotype_paths, phenotype_filesystems, suffix_iter, strict=True):
sep = fs.sep
# Match */<image_key>[-*].parquet (subdirectory pattern)
matches = (
fs.glob(f"{path}{sep}*{sep}{image_key}-*.parquet")
+ fs.glob(f"{path}{sep}*{sep}{image_key}.parquet")
)
if not matches:
# Match <image_key>[-*].parquet (flat pattern)
matches = (
fs.glob(f"{path}{sep}{image_key}-*.parquet")
+ fs.glob(f"{path}{sep}{image_key}.parquet")
)
for match in matches:
phenotypepaths.append(fs.unstrip_protocol(match))
if has_suffix:
phenotypesuffix.append(suffix)
return phenotypepaths, phenotypesuffix

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants