diff --git a/catalog/app/containers/Bucket/PackageTree.js b/catalog/app/containers/Bucket/PackageTree.js
index f3170d4d75d..080e48dc29d 100644
--- a/catalog/app/containers/Bucket/PackageTree.js
+++ b/catalog/app/containers/Bucket/PackageTree.js
@@ -27,51 +27,40 @@ import * as requests from './requests'
const TreeDisplay = tagged([
'File', // S3Handle
- 'Dir', // { files, dirs }
+ 'Dir', // { files, dirs, truncated }
'NotFound',
])
-const mkHandle = ({ logical_key: logicalKey, physical_keys: [key], size }) => ({
- ...parseS3Url(key),
+const mkHandle = ({ logicalKey, physicalKey, size }) => ({
+ ...parseS3Url(physicalKey),
size,
logicalKey,
})
const getParents = (path) => (path ? [...getParents(up(path)), path] : [])
-const computeTree = ({ bucket, name, revision, path }) =>
- R.pipe(
- R.prop('keys'),
- R.ifElse(
- () => isDir(path),
- R.pipe(
- R.applySpec({
- dirs: R.pipe(
- // eslint-disable-next-line camelcase
- R.map((info) => getPrefix(info.logical_key)),
- R.uniq,
- R.chain(getParents),
- R.uniq,
- R.filter((dir) => up(dir) === path),
- ),
- files: R.pipe(
- // eslint-disable-next-line camelcase
- R.filter((info) => getPrefix(info.logical_key) === path),
- R.map(mkHandle),
- ),
- bucket: () => bucket,
- name: () => name,
- revision: () => revision,
- path: () => path,
- }),
- TreeDisplay.Dir,
- ),
- (keys) => {
- const key = keys.find(R.propEq('logical_key', path))
- return key ? TreeDisplay.File(mkHandle(key)) : TreeDisplay.NotFound()
- },
- ),
- )
+const computeTree = ({ bucket, name, revision, path }) => ({ keys, truncated }) => {
+ if (isDir(path)) {
+ return TreeDisplay.Dir({
+ dirs: R.pipe(
+ R.map((info) => getPrefix(info.logicalKey)),
+ R.uniq,
+ R.chain(getParents),
+ R.uniq,
+ R.filter((dir) => up(dir) === path),
+ )(keys),
+ files: keys.filter((info) => getPrefix(info.logicalKey) === path).map(mkHandle),
+ bucket,
+ name,
+ revision,
+ path,
+ truncated,
+ })
+ }
+
+ const key = keys.find(R.propEq('logicalKey', path))
+ return key ? TreeDisplay.File(mkHandle(key)) : TreeDisplay.NotFound()
+}
const formatListing = ({ urls }, r) => {
const dirs = r.dirs.map((dir) =>
@@ -219,9 +208,9 @@ export default ({
),
- Dir: (dir) => (
+ Dir: ({ truncated, ...dir }) => (
-
+
{/* TODO: use proper versions */}
diff --git a/catalog/app/containers/Bucket/requests.js b/catalog/app/containers/Bucket/requests.js
index 7ec1166671c..b332aaaac9b 100644
--- a/catalog/app/containers/Bucket/requests.js
+++ b/catalog/app/containers/Bucket/requests.js
@@ -184,12 +184,6 @@ const loadRevisionHash = ({ s3, bucket, key }) =>
.promise()
.then((res) => res.Body.toString('utf-8'))
-const parseJSONL = R.pipe(
- R.split('\n'),
- R.map(R.tryCatch(JSON.parse, () => null)),
- R.reject(R.isNil),
-)
-
const getRevisionIdFromKey = (key) => key.substring(key.lastIndexOf('/') + 1)
const getRevisionKeyFromId = (name, id) => `${PACKAGES_PREFIX}${name}/${id}`
@@ -238,15 +232,70 @@ export const getPackageRevisions = withErrorHandling(
},
)
+const s3Select = ({
+ s3,
+ ExpressionType = 'SQL',
+ InputSerialization = { JSON: { Type: 'LINES' } },
+ ...rest
+}) =>
+ s3
+ .selectObjectContent({
+ ExpressionType,
+ InputSerialization,
+ OutputSerialization: { JSON: {} },
+ ...rest,
+ })
+ .promise()
+ .then(
+ R.pipe(
+ R.prop('Payload'),
+ R.reduce((acc, evt) => {
+ if (!evt.Records) return acc
+ const s = evt.Records.Payload.toString()
+ return acc + s
+ }, ''),
+ R.trim,
+ R.split('\n'),
+ R.map(JSON.parse),
+ ),
+ )
+
export const fetchPackageTree = withErrorHandling(
async ({ s3, bucket, name, revision }) => {
const hashKey = getRevisionKeyFromId(name, revision)
const hash = await loadRevisionHash({ s3, bucket, key: hashKey })
const manifestKey = `${MANIFESTS_PREFIX}${hash}`
- const r = await s3.getObject({ Bucket: bucket, Key: manifestKey }).promise()
- const [info, ...keys] = parseJSONL(r.Body.toString('utf-8'))
- const modified = r.LastModified
- return { id: revision, hash, info, keys, modified }
+
+ const [[{ total }], keys] = await Promise.all([
+ s3Select({
+ s3,
+ Bucket: bucket,
+ Key: manifestKey,
+ Expression: `
+ SELECT COUNT(*) AS total
+ FROM S3Object[*] o
+ WHERE o.logical_key IS NOT MISSING
+ `,
+ }),
+ s3Select({
+ s3,
+ Bucket: bucket,
+ Key: manifestKey,
+ Expression: `
+ SELECT
+ o.logical_key AS logicalKey,
+ o.physical_keys[0] AS physicalKey,
+ o."size" AS "size"
+ FROM S3Object[*] o
+ WHERE o.logical_key IS NOT MISSING
+ LIMIT 1000
+ `,
+ }),
+ ])
+
+ const truncated = total > keys.length
+
+ return { id: revision, hash, keys, truncated }
},
)
@@ -334,23 +383,13 @@ const queryAccessCounts = async ({
window = 365,
}) => {
try {
- const { Payload } = await s3
- .selectObjectContent({
- Bucket: analyticsBucket,
- Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`,
- Expression: query,
- ExpressionType: 'SQL',
- InputSerialization: { CSV: { FileHeaderInfo: 'Use' } },
- OutputSerialization: { JSON: {} },
- })
- .promise()
-
- const recordedCounts = Payload.reduce((acc, i) => {
- if (!i.Records) return acc
- const [json] = i.Records.Payload.toString().split('\n')
- const data = JSON.parse(json)
- return JSON.parse(data.counts)
- }, {})
+ const [{ counts: recordedCounts }] = await s3Select({
+ s3,
+ Bucket: analyticsBucket,
+ Key: `${ACCESS_COUNTS_PREFIX}/${type}.csv`,
+ Expression: query,
+ InputSerialization: { CSV: { FileHeaderInfo: 'Use' } },
+ })
const counts = R.times((i) => {
const date = dateFns.subDays(today, window - i - 1)