From d69e2b64445710012c047ce57dfc56a5faf1f47f Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Tue, 9 Dec 2025 10:49:27 +0100 Subject: [PATCH 1/4] Added and removed consolidation options for inverted indexes and arangosearch Views --- .../3.12/develop/http-api/indexes/inverted.md | 55 +++ .../http-api/views/arangosearch-views.md | 430 +++++++++++++++++ .../arangosearch-views-reference.md | 49 ++ .../version-3.12/api-changes-in-3-12.md | 38 ++ .../incompatible-changes-in-3-12.md | 23 + .../version-3.12/whats-new-in-3-12.md | 32 ++ .../4.0/develop/http-api/indexes/inverted.md | 64 ++- .../http-api/views/arangosearch-views.md | 450 ++++++++++++------ .../arangosearch-views-reference.md | 52 +- .../version-3.12/api-changes-in-3-12.md | 38 ++ .../incompatible-changes-in-3-12.md | 23 + .../version-3.12/whats-new-in-3-12.md | 32 ++ 12 files changed, 1110 insertions(+), 176 deletions(-) diff --git a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md index d2c5939c25..97fdd8bef8 100644 --- a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md @@ -567,6 +567,8 @@ paths: default: tier segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -578,21 +580,74 @@ paths: default: 8589934592 segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer default: 200 segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation. type: integer default: 50 minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer default: 0 + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 + default: 0.5 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool diff --git a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md index 2f33e5c772..825adc789a 100644 --- a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md @@ -348,6 +348,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -359,21 +361,74 @@ paths: default: 8589934592 segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer default: 200 segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer default: 50 minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer default: 0 + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 + default: 0.5 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool @@ -569,6 +624,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -578,18 +635,69 @@ paths: type: integer segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1041,6 +1149,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -1050,18 +1160,69 @@ paths: type: integer segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1444,6 +1605,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -1455,21 +1618,74 @@ paths: default: 8589934592 segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer default: 200 segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer default: 50 minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer default: 0 + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 + default: 0.5 responses: '200': description: | @@ -1643,6 +1859,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -1652,18 +1870,69 @@ paths: type: integer segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1952,6 +2221,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -1963,21 +2234,74 @@ paths: default: 8589934592 segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer default: 200 segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer default: 50 minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer default: 0 + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 + default: 0.5 responses: '200': description: | @@ -2151,6 +2475,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -2160,18 +2486,69 @@ paths: type: integer segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -2518,6 +2895,8 @@ paths: maximum: 1.0 segmentsBytesFloor: description: | + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. type: integer @@ -2527,18 +2906,69 @@ paths: type: integer segmentsMax: description: | + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. type: integer segmentsMin: description: | + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation type: integer minScore: description: | + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. type: integer + maxSkewThreshold: + description: | + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: + description: | + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. + type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). diff --git a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md index 036758127f..e9d50f7b80 100644 --- a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -485,10 +485,14 @@ is used by these writers (in terms of "writers pool") one can use - **segmentsMin** (_optional_; type: `integer`; default: `50`) + This option is only available up to v3.12.6: + The minimum number of segments that are evaluated as candidates for consolidation. - **segmentsMax** (_optional_; type: `integer`; default: `200`) + This option is only available up to v3.12.6: + The maximum number of segments that are evaluated as candidates for consolidation. - **segmentsBytesMax** (_optional_; type: `integer`; default: `8589934592`) @@ -497,9 +501,54 @@ is used by these writers (in terms of "writers pool") one can use - **segmentsBytesFloor** (_optional_; type: `integer`; default: `25165824`) + This option is only available up to v3.12.6: + Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection. - **minScore** (_optional_; type: `integer`; default: `0`) + This option is only available up to v3.12.6: + Filter out consolidation candidates with a score less than this. + + - **maxSkewThreshold** (_optional_; type: `number`; default: `0.4`) + + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + + - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`) + + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md index 8cc010cc0a..e3a588dbca 100644 --- a/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md +++ b/site/content/arangodb/3.12/release-notes/version-3.12/api-changes-in-3-12.md @@ -363,6 +363,25 @@ By consolidating less often and with more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +##### Added and removed consolidation options for `arangosearch` Views + +Introduced in: v3.12.7 + +The following options for consolidating `arangosearch` Views have been removed +and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + #### Document API The following endpoints accept a new `versionAttribute` query parameter that adds @@ -501,6 +520,25 @@ By consolidating less often and with more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +##### Added and removed consolidation options for inverted indexes + +Introduced in: v3.12.7 + +The following options for consolidating inverted indexes have been removed +and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + #### Optimizer rule descriptions Introduced in: v3.10.9, v3.11.2 diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md index 959cea82cf..1fe5a31d18 100644 --- a/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md +++ b/site/content/arangodb/3.12/release-notes/version-3.12/incompatible-changes-in-3-12.md @@ -994,6 +994,29 @@ more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +## Added and removed consolidation options for inverted indexs and `arangosearch` Views + +Introduced in: v3.12.7 + +The following options for consolidating inverted indexes as well as +`arangosearch` Views have been removed and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The consolidation works differently now and uses the new `maxSkewThreshold` and +`minDeletionRatio` options together with the existing `segmentsBytesMax`. If you +previously used customized settings for the removed options, check if the default +values of the new options are acceptable or if you need to tune them according to +your workload. + +For details, see: +- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md) +- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties) + ## HTTP RESTful API ### JavaScript-based traversal using `/_api/traversal` removed diff --git a/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md b/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md index d0d0c77010..199242cfb5 100644 --- a/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md +++ b/site/content/arangodb/3.12/release-notes/version-3.12/whats-new-in-3-12.md @@ -2452,6 +2452,38 @@ environment variable `NAME`. If there is an environment variable called `PID` or `TEMP_BASE_DIR`, then `@PID@` or `@TEMP_BASE_DIR@` is substituted with the value of the respective environment variable. +### New consolidation algorithm for inverted indexes and `arangosearch` Views + +Introduced in: v3.12.7 + +The `tier` consolidation policy now uses a different algorithm for merging +and cleaning up segments. Overall, it avoids consolidating segments where the +cost of writing the new segment is high and the gain in read performance is low +(e.g. combining a big segment file with a very small one). + +The following options have been removed for inverted indexes as well as +`arangosearch` Views because the new consolidation algorithm doesn't use them: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + +If you previously used customized settings for the removed options, check if the +default values of the new options are acceptable or if you need to tune them +according to your workload. + +For details, see: +- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md) +- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties) + ## Client tools ### Protocol aliases for endpoints diff --git a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md index d2c5939c25..c24a636d46 100644 --- a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md @@ -565,34 +565,58 @@ paths: document count as dictated by the customization attributes. type: string default: tier - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments as equal for - consolidation selection. - type: integer - default: 25165824 segmentsBytesMax: description: | The maximum allowed size of all consolidated segments in bytes. type: integer default: 8589934592 - segmentsMax: + maxSkewThreshold: description: | - The maximum number of segments that are evaluated as candidates for - consolidation. - type: integer - default: 200 - segmentsMin: - description: | - The minimum number of segments that are evaluated as candidates for - consolidation. - type: integer - default: 50 - minScore: + This option is available from v3.12.7 onward: + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + This option is available from v3.12.7 onward: + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer - default: 0 + minimum: 0.0 + maximum: 1.0 + default: 0.5 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool diff --git a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md index 2f33e5c772..1f35fb9e88 100644 --- a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md @@ -346,34 +346,54 @@ paths: default: 0 minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer - default: 25165824 segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer default: 8589934592 - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - default: 200 - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - default: 50 - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer - default: 0 + minimum: 0.0 + maximum: 1.0 + default: 0.5 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool @@ -567,29 +587,51 @@ paths: type: number minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1039,29 +1081,51 @@ paths: type: number minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1442,34 +1506,54 @@ paths: default: 0 minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer - default: 25165824 segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer default: 8589934592 - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - default: 200 - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - default: 50 - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer - default: 0 + minimum: 0.0 + maximum: 1.0 + default: 0.5 responses: '200': description: | @@ -1641,29 +1725,51 @@ paths: type: number minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -1950,34 +2056,54 @@ paths: default: 0 minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer - default: 25165824 segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer default: 8589934592 - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - default: 200 - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - default: 50 - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + default: 0.4 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer - default: 0 + minimum: 0.0 + maximum: 1.0 + default: 0.5 responses: '200': description: | @@ -2149,29 +2275,51 @@ paths: type: number minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). @@ -2516,29 +2664,51 @@ paths: type: number minimum: 0.0 maximum: 1.0 - segmentsBytesFloor: - description: | - Defines the value (in bytes) to treat all smaller segments - as equal for consolidation selection. - type: integer segmentsBytesMax: description: | Maximum allowed size of all consolidated segments in bytes. type: integer - segmentsMax: - description: | - The maximum number of segments that are evaluated as - candidates for consolidation. - type: integer - segmentsMin: + maxSkewThreshold: description: | - The minimum number of segments that are - evaluated as candidates for consolidation - type: integer - minScore: + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + type: number + minimum: 0.0 + maximum: 1.0 + minDeletionRatio: description: | - Filter out consolidation candidates with a score less than this. + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. type: integer + minimum: 0.0 + maximum: 1.0 writebufferIdle: description: | Maximum number of writers (segments) cached in the pool (`0` = disabled). diff --git a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md index 036758127f..7bcfdf3676 100644 --- a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -483,23 +483,43 @@ is used by these writers (in terms of "writers pool") one can use `consolidationPolicy` properties for `"tier"` type: - - **segmentsMin** (_optional_; type: `integer`; default: `50`) - - The minimum number of segments that are evaluated as candidates for consolidation. - - - **segmentsMax** (_optional_; type: `integer`; default: `200`) - - The maximum number of segments that are evaluated as candidates for consolidation. - - **segmentsBytesMax** (_optional_; type: `integer`; default: `8589934592`) Maximum allowed size of all consolidated segments in bytes. - - **segmentsBytesFloor** (_optional_; type: `integer`; default: `25165824`) - - Defines the value (in bytes) to treat all smaller segments as equal for consolidation - selection. - - - **minScore** (_optional_; type: `integer`; default: `0`) - - Filter out consolidation candidates with a score less than this. + - **maxSkewThreshold** (_optional_; type: `number`; default: `0.4`) + + Merge a subset of segments where the ratio of the largest segment size + to the combined segment size is within this threshold. Increasing the + threshold leads to fewer segment files and thus a potentially higher + read performance and less file descriptors but at the expense of more + frequent consolidations and thus higher write load. + + The skew describes how much segment files vary in size. It is a number + between `0.0` and `1.0` and calculated by dividing the largest file size + of a set of segment files by the total size. + + Multiple combinations of candidate segments are checked and the one with + the lowest skew value is selected for consolidation. This rather selects + many than few segments, but the new merged segment will be below the + configured `segmentsBytesMax`. The skew threshold prevents unnecessary + consolidation of e.g. a big segment file with a very small one, where the + cost of writing a merged segment is higher than the gain in read performance. + + - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`) + + Clean up segments where the ratio of deleted documents is at least + this high. Decreasing the minimum ratio leads to earlier consolidation + of segments with many deleted documents and thus reclamation of + disk space but causes a higher write load. + + The deletion ratio is the percentage of deleted documents across one + or more segment files. It is a number between `0.0` and `1.0` and + calculated by dividing the number of deleted documents by the total + number of documents. + + The segment files with the highest individual deletion ratio are + the candidates. As many as possible candidates are selected for + consolidation (in order of decreasing ratio), but the overall ratio + has to be at least `minDeletionRatio` and the new segment with the + active documents needs to be below the configured `segmentsBytesMax`. diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md index 8cc010cc0a..e3a588dbca 100644 --- a/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md +++ b/site/content/arangodb/4.0/release-notes/version-3.12/api-changes-in-3-12.md @@ -363,6 +363,25 @@ By consolidating less often and with more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +##### Added and removed consolidation options for `arangosearch` Views + +Introduced in: v3.12.7 + +The following options for consolidating `arangosearch` Views have been removed +and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + #### Document API The following endpoints accept a new `versionAttribute` query parameter that adds @@ -501,6 +520,25 @@ By consolidating less often and with more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +##### Added and removed consolidation options for inverted indexes + +Introduced in: v3.12.7 + +The following options for consolidating inverted indexes have been removed +and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + #### Optimizer rule descriptions Introduced in: v3.10.9, v3.11.2 diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md index 959cea82cf..1fe5a31d18 100644 --- a/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md +++ b/site/content/arangodb/4.0/release-notes/version-3.12/incompatible-changes-in-3-12.md @@ -994,6 +994,29 @@ more data, less file descriptors are used. - `segmentsBytesMax` increased from `5368709120` (5 GiB) to `8589934592` (8 GiB) - `segmentsBytesFloor` increased from `2097152` (2 MiB) to `25165824` (24 MiB) +## Added and removed consolidation options for inverted indexs and `arangosearch` Views + +Introduced in: v3.12.7 + +The following options for consolidating inverted indexes as well as +`arangosearch` Views have been removed and are now ignored when specified in a request: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The consolidation works differently now and uses the new `maxSkewThreshold` and +`minDeletionRatio` options together with the existing `segmentsBytesMax`. If you +previously used customized settings for the removed options, check if the default +values of the new options are acceptable or if you need to tune them according to +your workload. + +For details, see: +- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md) +- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties) + ## HTTP RESTful API ### JavaScript-based traversal using `/_api/traversal` removed diff --git a/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md b/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md index d0d0c77010..199242cfb5 100644 --- a/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md +++ b/site/content/arangodb/4.0/release-notes/version-3.12/whats-new-in-3-12.md @@ -2452,6 +2452,38 @@ environment variable `NAME`. If there is an environment variable called `PID` or `TEMP_BASE_DIR`, then `@PID@` or `@TEMP_BASE_DIR@` is substituted with the value of the respective environment variable. +### New consolidation algorithm for inverted indexes and `arangosearch` Views + +Introduced in: v3.12.7 + +The `tier` consolidation policy now uses a different algorithm for merging +and cleaning up segments. Overall, it avoids consolidating segments where the +cost of writing the new segment is high and the gain in read performance is low +(e.g. combining a big segment file with a very small one). + +The following options have been removed for inverted indexes as well as +`arangosearch` Views because the new consolidation algorithm doesn't use them: + +- `consolidationPolicy` (with `type` set to `tier`): + - `segmentsMin` + - `segmentsMax` + - `segmentsBytesFloor` + - `minScore` + +The following new options have been added: + +- `consolidationPolicy` (with `type` set to `tier`): + - `maxSkewThreshold` (number in range `[0.0, 1.0]`, default: `0.4`) + - `minDeletionRatio` (number in range `[0.0, 1.0]`, default: `0.5`) + +If you previously used customized settings for the removed options, check if the +default values of the new options are acceptable or if you need to tune them +according to your workload. + +For details, see: +- [HTTP interface for inverted indexes](../../develop/http-api/indexes/inverted.md) +- [`arangosearch` View properties](../../indexes-and-search/arangosearch/arangosearch-views-reference.md#view-properties) + ## Client tools ### Protocol aliases for endpoints From e99f223a9518e9cb55457148c2bb96a1709b8b66 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Thu, 11 Dec 2025 13:48:25 +0100 Subject: [PATCH 2/4] Feedback --- .../3.12/develop/http-api/indexes/inverted.md | 70 ++- .../http-api/views/arangosearch-views.md | 546 ++++++++++------- .../4.0/develop/http-api/indexes/inverted.md | 70 ++- .../http-api/views/arangosearch-views.md | 560 ++++++++++-------- 4 files changed, 731 insertions(+), 515 deletions(-) diff --git a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md index 97fdd8bef8..1cffffd269 100644 --- a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md @@ -605,22 +605,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -629,21 +634,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 diff --git a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md index 825adc789a..d4d87167db 100644 --- a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md @@ -386,22 +386,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -410,21 +415,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -657,22 +669,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -680,21 +697,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1182,22 +1206,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -1205,21 +1234,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1643,22 +1679,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -1667,21 +1708,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1892,22 +1940,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -1915,21 +1968,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2259,22 +2319,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -2283,21 +2348,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2508,22 +2580,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -2531,21 +2608,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2928,22 +3012,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -2951,21 +3040,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 diff --git a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md index c24a636d46..51f5ce263b 100644 --- a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md @@ -574,22 +574,27 @@ paths: description: | This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 @@ -598,21 +603,28 @@ paths: description: | This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 diff --git a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md index 1f35fb9e88..e2da04f01d 100644 --- a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md @@ -353,43 +353,55 @@ paths: default: 8589934592 maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 default: 0.4 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -593,42 +605,54 @@ paths: type: integer maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1087,42 +1111,54 @@ paths: type: integer maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1513,43 +1549,55 @@ paths: default: 8589934592 maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 default: 0.4 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -1731,42 +1779,54 @@ paths: type: integer maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2063,43 +2123,55 @@ paths: default: 8589934592 maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 default: 0.4 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2281,42 +2353,54 @@ paths: type: integer maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 @@ -2670,42 +2754,54 @@ paths: type: integer maxSkewThreshold: description: | - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. type: number minimum: 0.0 maximum: 1.0 minDeletionRatio: description: | - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. type: integer minimum: 0.0 maximum: 1.0 From 3588f8de14f219b716269cd0f317fbc2df7a1cf7 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Thu, 11 Dec 2025 22:13:37 +0100 Subject: [PATCH 3/4] Update text in two more places --- .../arangosearch-views-reference.md | 70 +++++++++++-------- .../arangosearch-views-reference.md | 70 +++++++++++-------- 2 files changed, 82 insertions(+), 58 deletions(-) diff --git a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md index e9d50f7b80..60736a4cc6 100644 --- a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -516,39 +516,51 @@ is used by these writers (in terms of "writers pool") one can use This option is available from v3.12.7 onward: - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`) This option is available from v3.12.7 onward: - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. diff --git a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md index 7bcfdf3676..6395f7fbd3 100644 --- a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -489,37 +489,49 @@ is used by these writers (in terms of "writers pool") one can use - **maxSkewThreshold** (_optional_; type: `number`; default: `0.4`) - Merge a subset of segments where the ratio of the largest segment size - to the combined segment size is within this threshold. Increasing the - threshold leads to fewer segment files and thus a potentially higher - read performance and less file descriptors but at the expense of more - frequent consolidations and thus higher write load. - - The skew describes how much segment files vary in size. It is a number - between `0.0` and `1.0` and calculated by dividing the largest file size - of a set of segment files by the total size. + The skew describes how much segment files vary in file size. It is a number + between `0.0` and `1.0` and is calculated by dividing the largest file size + of a set of segment files by the total size. For example, the skew of a + 200 MiB, 300 MiB, and 500 MiB segment file is `0.5` (`500 / 1000`). + + A large `maxSkewThreshold` value allows merging large segment files with + smaller ones, consolidation occurs more frequently, and there are fewer + segment files on disk at all times. While this may potentially improve the + read performance and use fewer file descriptors, frequent consolidations + cause a higher write load and thus a higher write amplification. + + On the other hand, a small threshold value triggers the consolidation only + when there are a large number of segment files that don't vary in size a lot. + Consolidation occurs less frequently, reducing the write amplification, but + it can result in a greater number of segment files on disk. Multiple combinations of candidate segments are checked and the one with - the lowest skew value is selected for consolidation. This rather selects - many than few segments, but the new merged segment will be below the - configured `segmentsBytesMax`. The skew threshold prevents unnecessary - consolidation of e.g. a big segment file with a very small one, where the - cost of writing a merged segment is higher than the gain in read performance. + the lowest skew value is selected for consolidation. The selection process + picks the greatest number of segments that together have the lowest skew value + while ensuring that the size of the new consolidated segment remains under + the configured `segmentsBytesMax`. - **minDeletionRatio** (_optional_; type: `number`; default: `0.5`) - Clean up segments where the ratio of deleted documents is at least - this high. Decreasing the minimum ratio leads to earlier consolidation - of segments with many deleted documents and thus reclamation of - disk space but causes a higher write load. - - The deletion ratio is the percentage of deleted documents across one - or more segment files. It is a number between `0.0` and `1.0` and - calculated by dividing the number of deleted documents by the total - number of documents. - - The segment files with the highest individual deletion ratio are - the candidates. As many as possible candidates are selected for - consolidation (in order of decreasing ratio), but the overall ratio - has to be at least `minDeletionRatio` and the new segment with the - active documents needs to be below the configured `segmentsBytesMax`. + The `minDeletionRatio` represents the minimum required deletion ratio + in one or more segments to perform a cleanup of those segments. + It is a number between `0.0` and `1.0`. + + The deletion ratio is the percentage of deleted documents across one or + more segment files and is calculated by dividing the number of deleted + documents by the total number of documents in a segment or a group of + segments. For example, if there is a segment with 1000 documents of which + 300 are deleted and another segment with 1000 documents of which 700 are + deleted, the deletion ratio is `0.5` (50%, calculated as `1000 / 2000`). + + The `minDeletionRatio` threshold must be carefully selected. A smaller + value leads to earlier cleanup of deleted documents from segments and + thus reclamation of disk space but it generates a higher write load. + A very large value lowers the write amplification but at the same time + the system can be left with a large number of segment files with a high + percentage of deleted documents that occupy disk space unnecessarily. + + During cleanup, the segment files are first arranged in decreasing + order of their individual deletion ratios. Then the largest subset of + segments whose collective deletion ratio is greater than or equal to + `minDeletionRatio` is picked. From ec122cd6b89afa89e7ef4153384eda425eaccaa7 Mon Sep 17 00:00:00 2001 From: Simran Spiller Date: Fri, 12 Dec 2025 08:33:27 +0100 Subject: [PATCH 4/4] Update consolidation policy description --- .../3.12/develop/http-api/indexes/inverted.md | 2 +- .../http-api/views/arangosearch-views.md | 40 +++++++++++-------- .../arangosearch-views-reference.md | 2 +- .../4.0/develop/http-api/indexes/inverted.md | 2 +- .../http-api/views/arangosearch-views.md | 40 +++++++++++-------- .../arangosearch-views-reference.md | 2 +- 6 files changed, 52 insertions(+), 36 deletions(-) diff --git a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md index 1cffffd269..1120c9dd4a 100644 --- a/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/3.12/develop/http-api/indexes/inverted.md @@ -561,7 +561,7 @@ paths: upon several possible configurable formulas as defined by their types. The supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. type: string default: tier diff --git a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md index d4d87167db..c8951af785 100644 --- a/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/3.12/develop/http-api/views/arangosearch-views.md @@ -307,7 +307,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -330,7 +331,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -611,7 +612,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -620,7 +622,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1148,7 +1150,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -1157,7 +1160,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1600,7 +1603,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -1623,7 +1627,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1882,7 +1886,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -1891,7 +1896,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2241,7 +2246,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -2264,7 +2270,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2522,7 +2528,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -2531,7 +2538,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2954,7 +2961,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -2963,7 +2971,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` diff --git a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md index 60736a4cc6..7c483e2737 100644 --- a/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/3.12/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -462,7 +462,7 @@ is used by these writers (in terms of "writers pool") one can use - `"bytes_accum"`: Consolidation is performed based on current memory consumption of segments and `threshold` property value. - - `"tier"`: Consolidate based on segment byte size and live document count + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. {{< warning >}} diff --git a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md index 51f5ce263b..77e860c288 100644 --- a/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md +++ b/site/content/arangodb/4.0/develop/http-api/indexes/inverted.md @@ -561,7 +561,7 @@ paths: upon several possible configurable formulas as defined by their types. The supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. type: string default: tier diff --git a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md index e2da04f01d..5c3d863fcb 100644 --- a/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md +++ b/site/content/arangodb/4.0/develop/http-api/views/arangosearch-views.md @@ -307,7 +307,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -330,7 +331,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -576,7 +577,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -585,7 +587,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1082,7 +1084,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -1091,7 +1094,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1503,7 +1506,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -1526,7 +1530,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -1750,7 +1754,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -1759,7 +1764,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2078,7 +2083,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. _Background:_ @@ -2101,7 +2107,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2324,7 +2330,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -2333,7 +2340,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` @@ -2725,7 +2732,8 @@ paths: description: | The consolidation policy to apply for selecting which segments should be merged. - - If the `tier` type is used, then the `segments*` and `minScore` properties are available. + - If the `tier` type is used, then the `maxSkewThreshold`, + `minDeletionRatio`, `segments*`, and `minScore` properties are available. - If the `bytes_accum` type is used, then the `threshold` property is available. type: object properties: @@ -2734,7 +2742,7 @@ paths: The segment candidates for the "consolidation" operation are selected based upon several possible configurable formulas as defined by their types. The currently supported types are: - - `"tier"`: consolidate based on segment byte size and live + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. - `"bytes_accum"`: consolidate if and only if `{threshold} > (segment_bytes + sum_of_merge_candidate_segment_bytes) / all_segment_bytes` diff --git a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md index 6395f7fbd3..e0c9ef42ed 100644 --- a/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md +++ b/site/content/arangodb/4.0/indexes-and-search/arangosearch/arangosearch-views-reference.md @@ -462,7 +462,7 @@ is used by these writers (in terms of "writers pool") one can use - `"bytes_accum"`: Consolidation is performed based on current memory consumption of segments and `threshold` property value. - - `"tier"`: Consolidate based on segment byte size and live document count + - `"tier"`: consolidate based on segment byte size skew and live document count as dictated by the customization attributes. {{< warning >}}