diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/util/DateContext.java b/backend/src/main/java/com/bakdata/conquery/models/forms/util/DateContext.java index 96270441f3..e79f6eb93c 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/util/DateContext.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/util/DateContext.java @@ -64,7 +64,7 @@ public class DateContext { * The generation of the contexts happens for each resolution with their mapped alignment. * The returned list is primarily sorted in the order of the given resolutions and secondarily by the temporal * succession of the contexts, e.g.: with resolutions YEARS, QUARTERS given the list would first contain the - * ascending year ranges and than the quarter ranges. The alignment references always the lower bound of the + * ascending year ranges and then the quarter ranges. The alignment references always the lower bound of the * dateRangeMask. * @param dateRangeMask The mask in which the contexts are generated * @param resolutionAndAlignment The resolutions to produce and their alignment @@ -125,7 +125,7 @@ public static Function> getDateRangeSubdivider(Align if (alignedSubdivisionCount % alignedPerResolution != 1) { // The loop did not fullfill the resolution-sized subdivision it begun - result.add(alignRef.makeMergedRange(alignedSubdivisions.get(alignedSubdivisions.size() - 1), interestingDate)); + result.add(alignRef.makeMergedRange(alignedSubdivisions.getLast(), interestingDate)); } return alignRef.getAlignedIterationDirection(result); diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java index f70370359c..b99322b7a0 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java @@ -124,18 +124,12 @@ public FrontendPreviewConfig getEntityPreviewFrontendConfig(DatasetId dataset) { * The user will upload a file and expect only well-corresponding resolutions. */ public ResolvedFilterValues resolveFilterValues(FilterId filterId, List searchTerms) { - SelectFilter filter = (SelectFilter) filterId.resolve(); - - + final SelectFilter filter = (SelectFilter) filterId.resolve(); final Namespace namespace = namespaces.get(filter.getDataset()); - SearchProcessor filterSearch = namespace.getFilterSearch(); - - final ExactFilterValueResult exactResult = filterSearch.findExact(filter, searchTerms); - - final ConnectorId connectorId = filter.getConnector().getId(); + final ExactFilterValueResult exactResult = namespace.getFilterSearch().findExact(filter, searchTerms); - return new ResolvedFilterValues(new ResolvedFilterResult(connectorId, filter.getId().toString(), exactResult.resolved), exactResult.unresolved); + return new ResolvedFilterValues(new ResolvedFilterResult(filterId.getConnector(), filter.getId().toString(), exactResult.resolved), exactResult.unresolved); } public AutoCompleteResult autocompleteTextFilter( diff --git a/backend/src/main/java/com/bakdata/conquery/util/search/SearchProcessor.java b/backend/src/main/java/com/bakdata/conquery/util/search/SearchProcessor.java index b9f8b036d2..d20bbeb0c8 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/search/SearchProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/util/search/SearchProcessor.java @@ -78,7 +78,8 @@ static List extractKeywords(FrontendValue value) { /** - * Query for an exact matching {@link FrontendValue}. + * Query for an exact matching {@link FrontendValue}, per search term return only the first match by priority of search sources. + * * Matches {@link FrontendValue#getValue()} or {@link FrontendValue#getLabel()} but case-insensitive. * @param filter The filter to the resulting value must correspond to (domain of the {@link FrontendValue}) * @param searchTerms The exact terms to match diff --git a/backend/src/main/java/com/bakdata/conquery/util/search/internal/InternalFilterSearch.java b/backend/src/main/java/com/bakdata/conquery/util/search/internal/InternalFilterSearch.java index 706ce4a446..63537fef15 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/search/internal/InternalFilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/util/search/internal/InternalFilterSearch.java @@ -292,13 +292,13 @@ public List findExact(SelectFilter filter, String searchTerm) return out; } + @Override public ConceptsProcessor.ExactFilterValueResult findExact(SelectFilter filter, List searchTerms) { final List out = new ArrayList<>(); // search in the full text engine final Set openSearchTerms = new HashSet<>(searchTerms); - for (final Iterator iterator = openSearchTerms.iterator(); iterator.hasNext(); ) { final String searchTerm = iterator.next(); @@ -309,7 +309,7 @@ public ConceptsProcessor.ExactFilterValueResult findExact(SelectFilter filter } iterator.remove(); - out.addAll(results); + out.add(results.getFirst()); } return new ConceptsProcessor.ExactFilterValueResult(out, openSearchTerms); diff --git a/backend/src/main/java/com/bakdata/conquery/util/search/solr/FilterValueSearch.java b/backend/src/main/java/com/bakdata/conquery/util/search/solr/FilterValueSearch.java index 18b1a15613..d066fff347 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/search/solr/FilterValueSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/util/search/solr/FilterValueSearch.java @@ -200,6 +200,9 @@ We chunk the values for resolving here so that the request does not bust any URI return new ConceptsProcessor.ExactFilterValueResult(List.of(), terms); } + // We are matching on label and value. + // So if for reason a value is present in multiple sources (map, template, ...) but has different labels + // both can be found. String collect = Stream.of( SolrFrontendValue.Fields.value_s, SolrFrontendValue.Fields.label_t @@ -217,7 +220,8 @@ We chunk the values for resolving here so that the request does not bust any URI try { List resolvedValues = new ArrayList<>(); - SolrQuery solrQuery = buildSolrQuery(collect, 0, batchSize, false, false, false); + // We sort to return value with the highest source priority and get the best description + SolrQuery solrQuery = buildSolrQuery(collect, 0, batchSize, true, false, false); String decodedQuery = URLDecoder.decode(String.valueOf(solrQuery), StandardCharsets.UTF_8); int queryHash = decodedQuery.hashCode(); diff --git a/backend/src/main/java/com/bakdata/conquery/util/search/solr/SolrProcessor.java b/backend/src/main/java/com/bakdata/conquery/util/search/solr/SolrProcessor.java index 87e7af8d4f..639e9c2edd 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/search/solr/SolrProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/util/search/solr/SolrProcessor.java @@ -73,20 +73,32 @@ public class SolrProcessor implements SearchProcessor, Managed { private final FilterValueConfig filterValueConfig; private final Map indexers = new ConcurrentHashMap<>(); - - private SolrClient solrSearchClient; - - private SolrClient solrIndexClient; - /** * Single threaded runtime for the chunk submitter. * This is mainly used to decouple mina threads from the solr client in order to prevent blocking and to convert between {@link com.bakdata.conquery.models.messages.namespaces.specific.RegisterColumnValues}'s * and solr chunk sizes. */ private final ExecutorService chunkDecoupleExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() - .setNameFormat("solr-submitter-%d") - .setDaemon(true) - .build()); + .setNameFormat("solr-submitter-%d") + .setDaemon(true) + .build()); + private SolrClient solrSearchClient; + private SolrClient solrIndexClient; + + /** + * Higher priority for lower number + * @param searchable The searchable entity that is map to a priority + * @return The priority + */ + static int getFilterValueSourcePriority(Searchable searchable) { + return switch (searchable) { + case SolrEmptySeachable ignore -> 0; + case LabelMap ignore -> 1; + case FilterTemplate ignore -> 2; + case Column ignore -> 3; + default -> Integer.MAX_VALUE; + }; + } @Override public void start() throws Exception { @@ -102,14 +114,16 @@ private synchronized void refreshClients() { if (solrSearchClient != null) { try { solrSearchClient.close(); - } catch (Exception e) { + } + catch (Exception e) { log.warn("Failed to close solr search client", e); } } if (solrIndexClient != null) { try { solrIndexClient.close(); - } catch (Exception e) { + } + catch (Exception e) { log.warn("Failed to close solr index client", e); } } @@ -134,7 +148,8 @@ public void clearSearch() { try (SolrClient solrClient = solrSearchClientFactory.get()) { log.info("Clearing collection: {}", solrClient.getDefaultCollection()); solrClient.deleteByQuery("*:*"); - } catch (SolrServerException | IOException e) { + } + catch (SolrServerException | IOException e) { throw new RuntimeException(e); } } @@ -180,21 +195,6 @@ private String getNameFromColumn(Column column) { return "shared_column_" + columnGroup; } - /** - * Higher priority for lower number - * @param searchable The searchable entity that is map to a priority - * @return The priority - */ - static int getFilterValueSourcePriority(Searchable searchable) { - return switch (searchable) { - case SolrEmptySeachable ignore -> 0; - case LabelMap ignore -> 1; - case FilterTemplate ignore -> 2; - case Column ignore -> 3; - default -> Integer.MAX_VALUE; - }; - } - /*package*/ FilterValueIndexer getIndexerFor(Searchable searchable) { String nameForSearchable = buildNameForSearchable(searchable); int sourcePriority = getFilterValueSourcePriority(searchable); @@ -231,7 +231,8 @@ public AutoCompleteResult topItems(SelectFilter filter, String text, Integer } @Override - public void indexManagerResidingSearches(Set managerSearchables, AtomicBoolean cancelledState, ProgressReporter progressReporter) throws InterruptedException { + public void indexManagerResidingSearches(Set managerSearchables, AtomicBoolean cancelledState, ProgressReporter progressReporter) + throws InterruptedException { // Index an empty result for all searches indexEmptyLabel(); @@ -239,7 +240,7 @@ public void indexManagerResidingSearches(Set managerSearchables, Ato progressReporter.setMax(managerSearchables.size()); progressReporter.start(); // Most computations are cheap but data intensive: we fork here to use as many cores as possible. - try(final ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)) { + try (final ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)) { final Map> searchCache = new ConcurrentHashMap<>(); for (Searchable searchable : managerSearchables) { @@ -357,10 +358,6 @@ private void indexFilterTemplate(FilterValueIndexer search, FilterTemplate temp) } } - public Collection findExact(SelectFilter filter, String searchTerm) { - - return findExact(filter, List.of(searchTerm)).resolved(); - } @Override public ConceptsProcessor.ExactFilterValueResult findExact(SelectFilter filter, List searchTerms) { @@ -371,7 +368,6 @@ public ConceptsProcessor.ExactFilterValueResult findExact(SelectFilter filter @Override public AutoCompleteResult query(SelectFilter filter, String maybeText, int itemsPerPage, int pageNumber) { - int start = itemsPerPage * pageNumber; return topItems(filter, maybeText, start, itemsPerPage); } @@ -406,7 +402,7 @@ public void execute() throws Exception { try { Stopwatch stopwatch = Stopwatch.createStarted(); - try(ExecutorService executorService = Executors.newFixedThreadPool(4)) { + try (ExecutorService executorService = Executors.newFixedThreadPool(4)) { for (ColumnId columnId : columns) { executorService.submit(() -> { solrProcessor.finalizeSearch(columnId.resolve()); @@ -417,7 +413,8 @@ public void execute() throws Exception { solrProcessor.explicitCommit(); getProgressReporter().report(1); log.info("Finished commit on collection {} in {}", solrProcessor.solrIndexClient.getDefaultCollection(), stopwatch); - } catch (Exception e) { + } + catch (Exception e) { log.error("Unable to issue explicit commit on collection {}", solrProcessor.solrIndexClient.getDefaultCollection(), e); } } diff --git a/backend/src/test/java/com/bakdata/conquery/util/search/SolrFilterValueTest.java b/backend/src/test/java/com/bakdata/conquery/util/search/SolrFilterValueTest.java index 3b23112b12..f11205bf75 100644 --- a/backend/src/test/java/com/bakdata/conquery/util/search/SolrFilterValueTest.java +++ b/backend/src/test/java/com/bakdata/conquery/util/search/SolrFilterValueTest.java @@ -126,7 +126,8 @@ public List getSearchReferences() { LabelMap labelMap = new LabelMap(getId(), ImmutableBiMap.of( "a", "Map A", "map b", "Map B", - "map c", "Map C" + "map c", "Map C", + "e", "Map E" // exists in all sources ), 0, false); final FilterTemplate index = new FilterTemplate( @@ -150,6 +151,10 @@ public FilterId getId() { }; } + public static Collection findExact(SolrProcessor solrProcessor, SelectFilter filter, String searchTerm) { + return solrProcessor.findExact(filter, List.of(searchTerm)).resolved(); + } + @Test @Order(0) public void addData() throws InterruptedException, SolrServerException, IOException { @@ -163,6 +168,7 @@ public void addData() throws InterruptedException, SolrServerException, IOExcept "a", // should be shadowed by LabelMap "b", // should be shadowed by external csv map "column c", + "e", // exists in all sources "column ab", "column ba", "" // Empty string handling @@ -190,7 +196,7 @@ public void addData() throws InterruptedException, SolrServerException, IOExcept @Order(1) public void findExactColumn() { - Collection actual = searchProcessor.findExact(FILTER, "column c"); + Collection actual = findExact(searchProcessor, FILTER, "column c"); assertThat(actual).containsExactly(new FrontendValue("column c", "column c")); } @@ -198,12 +204,12 @@ public void findExactColumn() { @Test @Order(1) public void findExactMap() { - Collection actualLabel = searchProcessor.findExact(FILTER, "Map A"); + Collection actualLabel = findExact(searchProcessor, FILTER, "Map A"); assertThat(actualLabel).containsExactly(new FrontendValue("a", "Map A")); - Collection actualValue = searchProcessor.findExact(FILTER, "map a"); + Collection actualValue = findExact(searchProcessor, FILTER, "map a"); assertThat(actualValue).containsExactly(new FrontendValue("a", "Map A")); } @@ -217,11 +223,11 @@ public void findEmptyTermFirstPage() { assertThat(uut.values()).isEqualTo(List.of( new FrontendValue("", "No Value", null), new FrontendValue("a", "Map A", null), + new FrontendValue("e", "Map E", null), new FrontendValue("map b", "Map B", null), - new FrontendValue("map c", "Map C", null), - new FrontendValue("b", "Data b", "b") + new FrontendValue("map c", "Map C", null) )); - assertThat(uut.total()).isEqualTo(13); + assertThat(uut.total()).isEqualTo(14); } ); } @@ -233,13 +239,13 @@ public void findEmptyTermSecondPage() { assertThat(actual).satisfies(uut -> { assertThat(uut.values()).isEqualTo(List.of( + new FrontendValue("b", "Data b", "b"), new FrontendValue("data a", "data a", "data a"), new FrontendValue("data c", "Data C", "data c"), new FrontendValue("data d", "data d", "data d"), - new FrontendValue("external-null", "external-null", "external-null"), - new FrontendValue("","internal", null) + new FrontendValue("external-null", "external-null", "external-null") )); - assertThat(uut.total()).isEqualTo(13); + assertThat(uut.total()).isEqualTo(14); } ); } @@ -251,11 +257,12 @@ public void findEmptyTermThirdPage() { assertThat(actual).satisfies(uut -> { assertThat(uut.values()).isEqualTo(List.of( + new FrontendValue("","internal", null), new FrontendValue("column ab", "column ab", "null"), new FrontendValue("column ba", "column ba", "null"), new FrontendValue("column c", "column c", "null") )); - assertThat(uut.total()).isEqualTo(13); + assertThat(uut.total()).isEqualTo(14); } ); } @@ -272,6 +279,7 @@ public void findTerm1() { new FrontendValue("data a", "Data", "data a"), new FrontendValue("map b", "Map B", "null"), new FrontendValue("map c", "Map C", "null"), + new FrontendValue("e", "Map E", "null"), new FrontendValue("b", "Data b", "b"), new FrontendValue("data c", "Data C", "data c"), new FrontendValue("data d", "data d", "data d"), @@ -281,7 +289,7 @@ public void findTerm1() { new FrontendValue("column ba", "column ba", "null"), new FrontendValue("column c", "column c", "null") ), - 12 + 13 ) ); } @@ -299,13 +307,14 @@ public void findTerm2() { new FrontendValue("a", "Map A", null), new FrontendValue("map b", "Map B", null), new FrontendValue("map c", "Map C", null), + new FrontendValue("e", "Map E", "null"), new FrontendValue("data a", "Data", "data a"), new FrontendValue("b", "Data B", "b"), new FrontendValue("data c", "Data C", "data c"), new FrontendValue("data d", "data d", "data d"), new FrontendValue("column c", "column c", null) ), - 10 + 11 ) ); } @@ -363,7 +372,7 @@ public void findPhrase1LimitPage1() { @Order(3) public void findExactNothing() { - Collection actual = searchProcessor.findExact(FILTER, ""); + Collection actual = findExact(searchProcessor, FILTER, ""); assertThat(actual).isEmpty(); } @@ -372,7 +381,7 @@ public void findExactNothing() { @Order(3) public void findExactUnknown() { - Collection actual = searchProcessor.findExact(FILTER, "z"); + Collection actual = findExact(searchProcessor, FILTER, "z"); assertThat(actual).isEmpty(); } @@ -381,7 +390,7 @@ public void findExactUnknown() { @Order(3) public void findExactUppercase() { - Collection actual = searchProcessor.findExact(FILTER, "MAP A"); + Collection actual = findExact(searchProcessor, FILTER, "MAP A"); assertThat(actual).containsExactly(new FrontendValue("a", "Map A")); } @@ -390,9 +399,9 @@ public void findExactUppercase() { @Order(3) public void findExactMultiple() { - ConceptsProcessor.ExactFilterValueResult actual = searchProcessor.findExact(FILTER, List.of("MAP A", "z")); + ConceptsProcessor.ExactFilterValueResult actual = searchProcessor.findExact(FILTER, List.of("MAP A", "z", "Data e")); - assertThat(actual).usingRecursiveComparison().isEqualTo(new ConceptsProcessor.ExactFilterValueResult(List.of(new FrontendValue("a", "Map A")),Set.of("z"))); + assertThat(actual).usingRecursiveComparison().isEqualTo(new ConceptsProcessor.ExactFilterValueResult(List.of(new FrontendValue("a", "Map A"), new FrontendValue("e", "Data E", "e")),Set.of("z"))); } @Test diff --git a/backend/src/test/resources/shared/mapping.csv b/backend/src/test/resources/shared/mapping.csv index 6d1ae9b5ec..748ff71877 100644 --- a/backend/src/test/resources/shared/mapping.csv +++ b/backend/src/test/resources/shared/mapping.csv @@ -4,6 +4,7 @@ b,unused,Data B, data c,unused,Data C, data c,unused,Data C duplicate, data d,unused,,unused +e,unused,Data E,unused ,,internal,null external-null,,, ,internal-external-null,, \ No newline at end of file