From 0f46369cfc96e152badb0b09648551ca8ddc7e20 Mon Sep 17 00:00:00 2001 From: Federico Grilli Date: Fri, 30 May 2025 09:52:02 +0200 Subject: [PATCH] JCR-5151 Provide an interface for ConsistencyCheck * allow plugging in of custom consistency check implementations * make required methods and classes protected (where possible) or public * ConsistencyCheck class is deprecated and delegates to DefaultConsistencyCheck --- .../bundle/ConsistencyCheckerError.java | 2 +- .../core/query/lucene/ConsistencyCheck.java | 759 +---------------- .../query/lucene/ConsistencyCheckError.java | 6 +- .../lucene/ConsistencyCheckInterface.java | 58 ++ .../query/lucene/DefaultConsistencyCheck.java | 803 ++++++++++++++++++ .../core/query/lucene/MultiIndex.java | 36 +- .../core/query/lucene/SearchIndex.java | 56 +- .../apache/jackrabbit/core/TestHelper.java | 5 +- .../SearchIndexConsistencyCheckTest.java | 24 +- 9 files changed, 987 insertions(+), 762 deletions(-) create mode 100644 jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckInterface.java create mode 100644 jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultConsistencyCheck.java diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerError.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerError.java index ccc2ac7495a..a0f2b5bf36f 100644 --- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerError.java +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/bundle/ConsistencyCheckerError.java @@ -25,7 +25,7 @@ /** * Base class for errors reported by the {@link ConsistencyCheckerImpl} */ -abstract class ConsistencyCheckerError { +public abstract class ConsistencyCheckerError { protected final String message; protected final NodeId nodeId; diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java index 8192984366c..2a849223d0c 100644 --- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheck.java @@ -16,761 +16,50 @@ */ package org.apache.jackrabbit.core.query.lucene; -import org.apache.commons.io.IOExceptionWithCause; -import org.apache.jackrabbit.core.HierarchyManager; -import org.apache.jackrabbit.core.RepositoryImpl; -import org.apache.jackrabbit.core.cluster.ClusterException; -import org.apache.jackrabbit.core.cluster.ClusterNode; -import org.apache.jackrabbit.core.persistence.IterablePersistenceManager; -import org.apache.jackrabbit.core.persistence.PersistenceManager; -import org.apache.jackrabbit.core.state.ItemState; -import org.apache.jackrabbit.core.state.ItemStateManager; -import org.apache.jackrabbit.core.state.NoSuchItemStateException; -import org.apache.jackrabbit.core.state.NodeState; -import org.apache.jackrabbit.core.state.ItemStateException; -import org.apache.jackrabbit.core.state.ChildNodeEntry; -import org.apache.jackrabbit.core.id.NodeId; -import org.apache.jackrabbit.spi.Path; -import org.apache.jackrabbit.spi.commons.conversion.MalformedPathException; -import org.apache.jackrabbit.spi.commons.name.NameConstants; -import org.apache.jackrabbit.spi.commons.name.PathBuilder; -import org.apache.lucene.document.Document; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.jcr.ItemNotFoundException; -import javax.jcr.RepositoryException; - import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; import java.util.List; -import java.util.ArrayList; -import java.util.Map; import java.util.Set; -import java.util.HashSet; + +import org.apache.jackrabbit.core.id.NodeId; /** - * Implements a consistency check on the search index. Currently the following - * checks are implemented: - * + * Consistency check implementation for search indexes delegating to {@link DefaultConsistencyCheck} for backward compatibility purposes. + * @deprecated Use {@link DefaultConsistencyCheck} directly or implement {@link ConsistencyCheckInterface} */ -public class ConsistencyCheck { +public class ConsistencyCheck implements ConsistencyCheckInterface { - /** - * Logger instance for this class - */ - private static final Logger log = LoggerFactory.getLogger(ConsistencyCheck.class); - - /** - * The number of nodes to fetch at once from the persistence manager. Defaults to 8kb - */ - private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce", 1024 * 8); - - private final SearchIndex handler; - - /** - * The ItemStateManager of the workspace. - */ - private final ItemStateManager stateMgr; - - /** - * The PersistenceManager of the workspace. - */ - private IterablePersistenceManager pm; + private final ConsistencyCheckInterface delegate; /** - * The index to check. - */ - private final MultiIndex index; - - /** - * All the node ids and whether they were found in the index. - */ - private Map nodeIds; - - /** - * Paths of nodes that are not be indexed - */ - private Set excludedPaths; - - /** - * Paths of nodes that will be excluded from consistency check - */ - private final Set ignoredPaths = new HashSet(); - - /** - * List of all errors. - */ - private final List errors = - new ArrayList(); - - /** - * Private constructor. - */ - private ConsistencyCheck(MultiIndex index, SearchIndex handler, Set excludedIds) { - this.index = index; - this.handler = handler; - final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); - excludedPaths = new HashSet(excludedIds.size()); - for (NodeId excludedId : excludedIds) { - try { - final Path path = hierarchyManager.getPath(excludedId); - excludedPaths.add(path); - } catch (ItemNotFoundException e) { - log.warn("Excluded node does not exist"); - } catch (RepositoryException e) { - log.error("Failed to get excluded path", e); - } - } - - //JCR-3773: ignore the tree jcr:nodeTypes - PathBuilder pathBuilder = new PathBuilder(); - pathBuilder.addRoot(); - pathBuilder.addLast(NameConstants.JCR_NODETYPES); - try { - Path path = pathBuilder.getPath(); - log.info("consistency check will skip " + path); - ignoredPaths.add(path); - } catch (MalformedPathException e) { - //will never happen - log.error("Malformed path", e); - } - - this.stateMgr = handler.getContext().getItemStateManager(); - final PersistenceManager pm = handler.getContext().getPersistenceManager(); - if (pm instanceof IterablePersistenceManager) { - this.pm = (IterablePersistenceManager) pm; - } - } - - /** - * Runs the consistency check on index. - * + * Creates a new consistency check for the given index. * - * - * @param index the index to check. - * @param handler the QueryHandler to use. + * @param index the index to check + * @param handler the QueryHandler to use * @param excludedIds the set of node ids that are not indexed - * @return the consistency check with the results. - * @throws IOException if an error occurs while checking. + * @deprecated Use {@link DefaultConsistencyCheck} directly or implement {@link ConsistencyCheckInterface} */ - static ConsistencyCheck run(MultiIndex index, SearchIndex handler, final Set excludedIds) - throws IOException { - ConsistencyCheck check = new ConsistencyCheck(index, handler, excludedIds); - check.run(); - return check; + @Deprecated + public ConsistencyCheck(MultiIndex index, SearchIndex handler, Set excludedIds) { + this.delegate = new DefaultConsistencyCheck(index, handler, excludedIds); } - /** - * Repairs detected errors during the consistency check. - * @param ignoreFailure if true repair failures are ignored, - * the repair continues without throwing an exception. If - * false the repair procedure is aborted on the first - * repair failure. - * @throws IOException if a repair failure occurs. - */ - public void repair(boolean ignoreFailure) throws IOException { - if (errors.size() == 0) { - log.info("No errors found."); - return; - } - int notRepairable = 0; - for (ConsistencyCheckError error : errors) { - try { - if (error.repairable()) { - error.repair(); - } else { - log.warn("Not repairable: " + error); - notRepairable++; - } - } catch (Exception e) { - if (ignoreFailure) { - log.warn("Exception while repairing: " + error, e); - } else if (e instanceof IOException) { - throw (IOException) e; - } else { - throw new IOExceptionWithCause(e); - } - } - } - log.info("Repaired " + (errors.size() - notRepairable) + " errors."); - if (notRepairable > 0) { - log.warn("" + notRepairable + " error(s) not repairable."); - } + @Override + public void run() throws IOException { + delegate.run(); } - /** - * Returns the errors detected by the consistency check. - * @return the errors detected by the consistency check. - */ - public List getErrors() { - return new ArrayList(errors); + @Override + public void repair(boolean ignoreFailure) throws IOException { + delegate.repair(ignoreFailure); } - /** - * Runs the consistency check. - * @throws IOException if an error occurs while running the check. - */ - private void run() throws IOException { - log.info("Checking index of workspace " + handler.getContext().getWorkspace()); - loadNodes(); - if (nodeIds != null) { - checkIndexConsistency(); - checkIndexCompleteness(); - } + @Override + public List getErrors() { + return delegate.getErrors(); } + @Override public void doubleCheckErrors() { - if (!errors.isEmpty()) { - log.info("Double checking errors"); - final ClusterNode clusterNode = handler.getContext().getClusterNode(); - if (clusterNode != null) { - try { - clusterNode.sync(); - } catch (ClusterException e) { - log.error("Could not sync cluster node for double checking errors"); - } - } - final Iterator iterator = errors.iterator(); - while (iterator.hasNext()) { - try { - final ConsistencyCheckError error = iterator.next(); - if (!error.doubleCheck(handler, stateMgr)) { - log.info("False positive: " + error.toString()); - iterator.remove(); - } - } catch (RepositoryException e) { - log.error("Failed to double check consistency error", e); - } catch (IOException e) { - log.error("Failed to double check consistency error", e); - } - } - } - } - - private void loadNodes() { - log.info("Loading nodes"); - try { - int count = 0; - Map nodeIds = new HashMap(); - List batch = pm.getAllNodeIds(null, NODESATONCE); - NodeId lastId = null; - while (!batch.isEmpty()) { - for (NodeId nodeId : batch) { - lastId = nodeId; - - count++; - if (count % 1000 == 0) { - log.info(pm + ": loaded " + count + " node ids..."); - } - - nodeIds.put(nodeId, Boolean.FALSE); - - } - batch = pm.getAllNodeIds(lastId, NODESATONCE); - } - if (pm.exists(lastId)) { - this.nodeIds = nodeIds; - } else { - log.info("Failed to read all nodes, starting over"); - loadNodes(); - } - } catch (ItemStateException e) { - log.error("Exception while loading items to check", e); - } catch (RepositoryException e) { - log.error("Exception while loading items to check", e); - } - } - - private void checkIndexConsistency() throws IOException { - log.info("Checking index consistency"); - // Ids of multiple nodes in the index - Set multipleEntries = new HashSet(); - CachingMultiIndexReader reader = index.getIndexReader(); - try { - for (int i = 0; i < reader.maxDoc(); i++) { - if (i > 10 && i % (reader.maxDoc() / 5) == 0) { - long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f)); - log.info("progress: " + progress + "%"); - } - if (reader.isDeleted(i)) { - continue; - } - Document d = reader.document(i, FieldSelectors.UUID); - NodeId id = new NodeId(d.get(FieldNames.UUID)); - if (!isIgnored(id)) { - boolean nodeExists = nodeIds.containsKey(id); - if (nodeExists) { - Boolean alreadyIndexed = nodeIds.put(id, Boolean.TRUE); - if (alreadyIndexed) { - multipleEntries.add(id); - } - } else { - errors.add(new NodeDeleted(id)); - } - } - } - } finally { - reader.release(); - } - - // create multiple entries errors - for (NodeId id : multipleEntries) { - errors.add(new MultipleEntries(id)); - } - - reader = index.getIndexReader(); - try { - // run through documents again and check parent - for (int i = 0; i < reader.maxDoc(); i++) { - if (i > 10 && i % (reader.maxDoc() / 5) == 0) { - long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f)); - log.info("progress: " + (progress + 50) + "%"); - } - if (reader.isDeleted(i)) { - continue; - } - Document d = reader.document(i, FieldSelectors.UUID_AND_PARENT); - NodeId id = new NodeId(d.get(FieldNames.UUID)); - if (!nodeIds.containsKey(id) || isIgnored(id)) { - // this node is ignored or was already marked for deletion - continue; - } - String parent = d.get(FieldNames.PARENT); - if (parent == null || parent.isEmpty()) { - continue; - } - final NodeId parentId = new NodeId(parent); - - boolean parentExists = nodeIds.containsKey(parentId); - boolean parentIndexed = parentExists && nodeIds.get(parentId); - if (parentIndexed) { - continue; - } else if (id.equals(RepositoryImpl.SYSTEM_ROOT_NODE_ID) - && parentId.equals(RepositoryImpl.ROOT_NODE_ID)) { - continue; // special case for the /jcr:system node - } - - // parent is missing from index - if (parentExists) { - errors.add(new MissingAncestor(id, parentId)); - } else { - try { - final ItemState itemState = stateMgr.getItemState(id); - if (parentId.equals(itemState.getParentId())) { - // orphaned node - errors.add(new UnknownParent(id, parentId)); - } else { - errors.add(new WrongParent(id, parentId, itemState.getParentId())); - } - } catch (ItemStateException ignored) { - } - } - } - } finally { - reader.release(); - } - - } - - private void checkIndexCompleteness() { - log.info("Checking index completeness"); - int i = 0; - int size = nodeIds.size(); - for (Map.Entry entry : nodeIds.entrySet()) { - // check whether all nodes in the repository are indexed - NodeId nodeId = entry.getKey(); - boolean indexed = entry.getValue(); - try { - if (++i > 10 && i % (size / 10) == 0) { - long progress = Math.round((100.0 * (float) i) / (float) size); - log.info("progress: " + progress + "%"); - } - if (!indexed && !isIgnored(nodeId) && !isExcluded(nodeId)) { - NodeState nodeState = getNodeState(nodeId); - if (nodeState != null && !isBrokenNode(nodeId, nodeState)) { - errors.add(new NodeAdded(nodeId)); - } - } - } catch (ItemStateException e) { - log.error("Failed to check node: " + nodeId, e); - } - } - } - - private boolean isExcluded(NodeId id) { - try { - final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); - final Path path = hierarchyManager.getPath(id); - for (Path excludedPath : excludedPaths) { - if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) { - return true; - } - } - } catch (RepositoryException ignored) { - } - return false; - } - - private boolean isIgnored(NodeId id) { - try { - final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); - final Path path = hierarchyManager.getPath(id); - for (Path excludedPath : ignoredPaths) { - if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) { - return true; - } - } - } catch (RepositoryException ignored) { - } - return false; - } - - private NodeState getNodeState(NodeId nodeId) throws ItemStateException { - try { - return (NodeState) stateMgr.getItemState(nodeId); - } catch (NoSuchItemStateException e) { - return null; - } - } - - private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException { - final NodeId parentId = nodeState.getParentId(); - if (parentId != null) { - final NodeState parentState = getNodeState(parentId); - if (parentState == null) { - log.warn("Node missing from index is orphaned node: " + nodeId); - return true; - } - if (!parentState.hasChildNodeEntry(nodeId)) { - log.warn("Node missing from index is abandoned node: " + nodeId); - return true; - } - } - return false; - } - - /** - * Returns the path for node. If an error occurs this method - * returns the uuid of the node. - * - * @param node the node to retrieve the path from - * @return the path of the node or its uuid. - */ - private String getPath(NodeState node) { - // remember as fallback - String uuid = node.getNodeId().toString(); - StringBuilder path = new StringBuilder(); - List elements = new ArrayList(); - try { - while (node.getParentId() != null) { - NodeId parentId = node.getParentId(); - NodeState parent = (NodeState) stateMgr.getItemState(parentId); - ChildNodeEntry entry = parent.getChildNodeEntry(node.getNodeId()); - if (entry == null) { - log.warn("Failed to build path: abandoned child {} of node {}. " + - "Please run a repository consistency check", node.getNodeId(), parentId); - return uuid; - } - elements.add(entry); - node = parent; - } - for (int i = elements.size() - 1; i > -1; i--) { - ChildNodeEntry entry = elements.get(i); - path.append('/').append(entry.getName().getLocalName()); - if (entry.getIndex() > 1) { - path.append('[').append(entry.getIndex()).append(']'); - } - } - if (path.length() == 0) { - path.append('/'); - } - return path.toString(); - } catch (ItemStateException e) { - return uuid; - } - } - - //-------------------< ConsistencyCheckError classes >---------------------- - - /** - * One or more ancestors of an indexed node are not available in the index. - */ - private class MissingAncestor extends ConsistencyCheckError { - - private final NodeId parentId; - - private MissingAncestor(NodeId id, NodeId parentId) { - super("Parent of " + id + " missing in index. Parent: " + parentId, id); - this.parentId = parentId; - } - - /** - * Returns true. - * @return true. - */ - public boolean repairable() { - return true; - } - - /** - * Repairs the missing node by indexing the missing ancestors. - * @throws Exception if an error occurs while repairing. - */ - public void repair() throws Exception { - NodeId ancestorId = parentId; - while (ancestorId != null && nodeIds.containsKey(ancestorId) && nodeIds.get(ancestorId)) { - NodeState n = (NodeState) stateMgr.getItemState(ancestorId); - log.info("Repairing missing node " + getPath(n) + " (" + ancestorId + ")"); - Document d = index.createDocument(n); - index.addDocument(d); - nodeIds.put(n.getNodeId(), Boolean.TRUE); - ancestorId = n.getParentId(); - } - } - - @Override - boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) - throws RepositoryException, IOException { - final List documents = handler.getNodeDocuments(id); - for (Document document : documents) { - final String parent = document.get(FieldNames.PARENT); - if (parent != null && !parent.isEmpty()) { - final NodeId parentId = new NodeId(parent); - if (handler.getNodeDocuments(parentId).isEmpty()) { - return true; - } - } - } - return false; - - } - } - - /** - * The parent of a node is not in the repository - */ - private static class UnknownParent extends ConsistencyCheckError { - - private NodeId parentId; - - private UnknownParent(NodeId id, NodeId parentId) { - super("Node " + id + " has unknown parent: " + parentId, id); - this.parentId = parentId; - } - - /** - * Not reparable (yet). - * @return false. - */ - public boolean repairable() { - return false; - } - - /** - * No operation. - */ - public void repair() { - log.warn("Unknown parent for " + id + " cannot be repaired"); - } - - @Override - boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) - throws IOException, RepositoryException { - final List documents = handler.getNodeDocuments(id); - for (Document document : documents) { - final String parent = document.get(FieldNames.PARENT); - if (parent != null && !parent.isEmpty()) { - final NodeId parentId = new NodeId(parent); - if (parentId.equals(this.parentId) && !stateManager.hasItemState(parentId)) { - return true; - } - } - } - return false; - } - } - - /** - * The parent as indexed does not correspond with the actual parent in the repository - */ - private class WrongParent extends ConsistencyCheckError { - - private NodeId indexedParentId; - - private WrongParent(NodeId id, NodeId indexedParentId, NodeId actualParentId) { - super("Node " + id + " has wrong parent: " + indexedParentId + ", should be : " + actualParentId, id); - this.indexedParentId = indexedParentId; - } - - @Override - public boolean repairable() { - return true; - } - - /** - * Reindex node. - */ - @Override - void repair() throws Exception { - index.removeAllDocuments(id); - try { - NodeState node = (NodeState) stateMgr.getItemState(id); - log.info("Re-indexing node with wrong parent in index: " + getPath(node)); - Document d = index.createDocument(node); - index.addDocument(d); - nodeIds.put(node.getNodeId(), Boolean.TRUE); - } catch (NoSuchItemStateException e) { - log.info("Not re-indexing node with wrong parent because node no longer exists"); - } - } - - @Override - boolean doubleCheck(final SearchIndex handler, final ItemStateManager stateManager) - throws RepositoryException, IOException { - final List documents = handler.getNodeDocuments(id); - for (Document document : documents) { - final String parent = document.get(FieldNames.PARENT); - if (parent != null && !parent.isEmpty()) { - final NodeId parentId = new NodeId(parent); - if (parentId.equals(indexedParentId) && !stateManager.hasItemState(parentId)) { - return true; - } - } - } - return false; - } - - } - - /** - * A node is present multiple times in the index. - */ - private class MultipleEntries extends ConsistencyCheckError { - - MultipleEntries(NodeId id) { - super("Multiple entries found for node " + id, id); - } - - /** - * Returns true. - * @return true. - */ - public boolean repairable() { - return true; - } - - /** - * Removes the nodes with the identical uuids from the index and - * re-index the node. - * @throws IOException if an error occurs while repairing. - */ - public void repair() throws Exception { - // first remove all occurrences - index.removeAllDocuments(id); - // then re-index the node - try { - NodeState node = (NodeState) stateMgr.getItemState(id); - log.info("Re-indexing duplicate node occurrences in index: " + getPath(node)); - Document d = index.createDocument(node); - index.addDocument(d); - nodeIds.put(node.getNodeId(), Boolean.TRUE); - } catch (NoSuchItemStateException e) { - log.info("Not re-indexing node with multiple occurrences because node no longer exists"); - } - } - - @Override - boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) - throws RepositoryException, IOException { - return handler.getNodeDocuments(id).size() > 1; - } - } - - /** - * Indicates that a node has been deleted but is still in the index. - */ - private class NodeDeleted extends ConsistencyCheckError { - - NodeDeleted(NodeId id) { - super("Node " + id + " no longer exists.", id); - } - - /** - * Returns true. - * @return true. - */ - public boolean repairable() { - return true; - } - - /** - * Deletes the nodes from the index. - * @throws IOException if an error occurs while repairing. - */ - public void repair() throws IOException { - log.info("Removing deleted node from index: " + id); - index.removeDocument(id); - } - - @Override - boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) - throws RepositoryException, IOException { - final List documents = handler.getNodeDocuments(id); - if (!documents.isEmpty()) { - if (!stateManager.hasItemState(id)) { - return true; - } - } - return false; - } - } - - private class NodeAdded extends ConsistencyCheckError { - - NodeAdded(final NodeId id) { - super("Node " + id + " is missing.", id); - } - - @Override - public boolean repairable() { - return true; - } - - @Override - void repair() throws Exception { - try { - NodeState nodeState = (NodeState) stateMgr.getItemState(id); - log.info("Adding missing node to index: " + getPath(nodeState)); - final Iterator remove = Collections.emptyList().iterator(); - final Iterator add = Collections.singletonList(nodeState).iterator(); - handler.updateNodes(remove, add); - } catch (NoSuchItemStateException e) { - log.info("Not adding missing node because node no longer exists"); - } - } - - @Override - boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) - throws RepositoryException, IOException { - final List documents = handler.getNodeDocuments(id); - if (documents.isEmpty()) { - if (stateManager.hasItemState(id)) { - return true; - } - } - return false; - } - + delegate.doubleCheckErrors(); } } diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java index 5281548f0a9..7119d89d3b2 100644 --- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckError.java @@ -38,7 +38,7 @@ public abstract class ConsistencyCheckError { */ protected final NodeId id; - ConsistencyCheckError(String message, NodeId id) { + protected ConsistencyCheckError(String message, NodeId id) { this.message = message; this.id = id; } @@ -61,7 +61,7 @@ public String toString() { * Executes the repair operation. * @throws Exception if an error occurs while repairing. */ - abstract void repair() throws Exception; + protected abstract void repair() throws Exception; /** * Double check the error. Used to rule out false positives in live environments. @@ -70,6 +70,6 @@ public String toString() { * @throws RepositoryException * @throws IOException */ - abstract boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + protected abstract boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) throws RepositoryException, IOException; } diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckInterface.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckInterface.java new file mode 100644 index 00000000000..4c682c03902 --- /dev/null +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/ConsistencyCheckInterface.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; +import java.util.List; + +/** + * Interface for consistency checking of search indexes. + * + * @since Apache Jackrabbit 2.0 + */ +public interface ConsistencyCheckInterface { + + /** + * Runs the consistency check. + * + * @throws IOException if an error occurs while running the check. + */ + void run() throws IOException; + + /** + * Repairs detected errors during the consistency check. + * + * @param ignoreFailure if true repair failures are ignored, + * the repair continues without throwing an exception. If + * false the repair procedure is aborted on the first + * repair failure. + * @throws IOException if a repair failure occurs. + */ + void repair(boolean ignoreFailure) throws IOException; + + /** + * Returns the errors detected by the consistency check. + * + * @return the errors detected by the consistency check. + */ + List getErrors(); + + /** + * Performs a double check on the detected errors to verify they are still valid. + */ + void doubleCheckErrors(); +} diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultConsistencyCheck.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultConsistencyCheck.java new file mode 100644 index 00000000000..5c006cbf2dd --- /dev/null +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultConsistencyCheck.java @@ -0,0 +1,803 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.core.query.lucene; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import javax.jcr.ItemNotFoundException; +import javax.jcr.RepositoryException; + +import org.apache.commons.io.IOExceptionWithCause; +import org.apache.jackrabbit.core.HierarchyManager; +import org.apache.jackrabbit.core.RepositoryImpl; +import org.apache.jackrabbit.core.cluster.ClusterException; +import org.apache.jackrabbit.core.cluster.ClusterNode; +import org.apache.jackrabbit.core.id.NodeId; +import org.apache.jackrabbit.core.persistence.IterablePersistenceManager; +import org.apache.jackrabbit.core.persistence.PersistenceManager; +import org.apache.jackrabbit.core.state.ChildNodeEntry; +import org.apache.jackrabbit.core.state.ItemState; +import org.apache.jackrabbit.core.state.ItemStateException; +import org.apache.jackrabbit.core.state.ItemStateManager; +import org.apache.jackrabbit.core.state.NoSuchItemStateException; +import org.apache.jackrabbit.core.state.NodeState; +import org.apache.jackrabbit.spi.Path; +import org.apache.jackrabbit.spi.commons.conversion.MalformedPathException; +import org.apache.jackrabbit.spi.commons.name.NameConstants; +import org.apache.jackrabbit.spi.commons.name.PathBuilder; +import org.apache.lucene.document.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implements a consistency check on the search index. Currently the following + * checks are implemented: + *
    + *
  • Does the node exist in the ItemStateManager? If it does not exist + * anymore the node is deleted from the index.
  • + *
  • Is the parent of a node also present in the index? If it is not present it + * will be indexed.
  • + *
  • Is a node indexed multiple times? If that is the case, all occurrences + * in the index for such a node are removed, and the node is re-indexed.
  • + *
  • Is a node missing from the index? If so, it is added.
  • + *
+ */ +public class DefaultConsistencyCheck implements ConsistencyCheckInterface { + + /** + * Logger instance for this class + */ + private static final Logger log = LoggerFactory.getLogger(DefaultConsistencyCheck.class); + + /** + * The number of nodes to fetch at once from the persistence manager. Defaults to 8kb + */ + private static final int NODESATONCE = Integer.getInteger("org.apache.jackrabbit.checker.nodesatonce", 1024 * 8); + + private final SearchIndex handler; + + /** + * The ItemStateManager of the workspace. + */ + private final ItemStateManager stateMgr; + + /** + * The PersistenceManager of the workspace. + */ + private IterablePersistenceManager pm; + + /** + * The index to check. + */ + private final MultiIndex index; + + /** + * All the node ids and whether they were found in the index. + */ + private Map nodeIds; + + /** + * Paths of nodes that are not be indexed + */ + private Set excludedPaths; + + /** + * Paths of nodes that will be excluded from consistency check + */ + private final Set ignoredPaths = new HashSet(); + + /** + * List of all errors. + */ + private final List errors = + new ArrayList(); + + /** + * Creates a new consistency check for the given index. + * + * @param index the index to check + * @param handler the QueryHandler to use + * @param excludedIds the set of node ids that are not indexed + */ + /** + * Creates a new consistency check for the given index. + * + * @param index the index to check + * @param handler the QueryHandler to use + * @param excludedIds the set of node ids that are not indexed + */ + public DefaultConsistencyCheck(MultiIndex index, SearchIndex handler, Set excludedIds) { + this.index = index; + this.handler = handler; + final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); + excludedPaths = new HashSet(excludedIds.size()); + for (NodeId excludedId : excludedIds) { + try { + final Path path = hierarchyManager.getPath(excludedId); + excludedPaths.add(path); + } catch (ItemNotFoundException e) { + log.warn("Excluded node does not exist"); + } catch (RepositoryException e) { + log.error("Failed to get excluded path", e); + } + } + + //JCR-3773: ignore the tree jcr:nodeTypes + PathBuilder pathBuilder = new PathBuilder(); + pathBuilder.addRoot(); + pathBuilder.addLast(NameConstants.JCR_NODETYPES); + try { + Path path = pathBuilder.getPath(); + log.info("consistency check will skip " + path); + ignoredPaths.add(path); + } catch (MalformedPathException e) { + //will never happen + log.error("Malformed path", e); + } + + this.stateMgr = handler.getContext().getItemStateManager(); + final PersistenceManager pm = handler.getContext().getPersistenceManager(); + if (pm instanceof IterablePersistenceManager) { + this.pm = (IterablePersistenceManager) pm; + } + } + + + + /** + * Repairs detected errors during the consistency check. + * @param ignoreFailure if true repair failures are ignored, + * the repair continues without throwing an exception. If + * false the repair procedure is aborted on the first + * repair failure. + * @throws IOException if a repair failure occurs. + */ + @Override + public void repair(boolean ignoreFailure) throws IOException { + if (errors.size() == 0) { + log.info("No errors found."); + return; + } + int notRepairable = 0; + for (ConsistencyCheckError error : errors) { + try { + if (error.repairable()) { + error.repair(); + } else { + log.warn("Not repairable: " + error); + notRepairable++; + } + } catch (Exception e) { + if (ignoreFailure) { + log.warn("Exception while repairing: " + error, e); + } else if (e instanceof IOException) { + throw (IOException) e; + } else { + throw new IOExceptionWithCause(e); + } + } + } + log.info("Repaired " + (errors.size() - notRepairable) + " errors."); + if (notRepairable > 0) { + log.warn("" + notRepairable + " error(s) not repairable."); + } + } + + /** + * Returns the errors detected by the consistency check. + * @return the errors detected by the consistency check. + */ + @Override + public List getErrors() { + return new ArrayList(errors); + } + + /** + * Runs the consistency check. + * @throws IOException if an error occurs while running the check. + */ + @Override + public void run() throws IOException { + log.info("Checking index of workspace " + handler.getContext().getWorkspace()); + loadNodes(); + if (nodeIds != null) { + checkIndexConsistency(); + checkIndexCompleteness(); + } + } + + @Override + public void doubleCheckErrors() { + if (!errors.isEmpty()) { + log.info("Double checking errors"); + final ClusterNode clusterNode = handler.getContext().getClusterNode(); + if (clusterNode != null) { + try { + clusterNode.sync(); + } catch (ClusterException e) { + log.error("Could not sync cluster node for double checking errors"); + } + } + final Iterator iterator = errors.iterator(); + while (iterator.hasNext()) { + try { + final ConsistencyCheckError error = iterator.next(); + if (!error.doubleCheck(handler, stateMgr)) { + log.info("False positive: " + error.toString()); + iterator.remove(); + } + } catch (RepositoryException e) { + log.error("Failed to double check consistency error", e); + } catch (IOException e) { + log.error("Failed to double check consistency error", e); + } + } + } + } + + private void loadNodes() { + log.info("Loading nodes"); + try { + int count = 0; + Map nodeIds = new HashMap(); + List batch = pm.getAllNodeIds(null, NODESATONCE); + NodeId lastId = null; + while (!batch.isEmpty()) { + for (NodeId nodeId : batch) { + lastId = nodeId; + + count++; + if (count % 1000 == 0) { + log.info(pm + ": loaded " + count + " node ids..."); + } + + nodeIds.put(nodeId, Boolean.FALSE); + + } + batch = pm.getAllNodeIds(lastId, NODESATONCE); + } + if (pm.exists(lastId)) { + this.nodeIds = nodeIds; + } else { + log.info("Failed to read all nodes, starting over"); + loadNodes(); + } + } catch (ItemStateException e) { + log.error("Exception while loading items to check", e); + } catch (RepositoryException e) { + log.error("Exception while loading items to check", e); + } + } + + protected void checkIndexConsistency() throws IOException { + log.info("Checking index consistency"); + // Ids of multiple nodes in the index + Set multipleEntries = new HashSet(); + CachingMultiIndexReader reader = index.getIndexReader(); + try { + for (int i = 0; i < reader.maxDoc(); i++) { + if (i > 10 && i % (reader.maxDoc() / 5) == 0) { + long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f)); + log.info("progress: " + progress + "%"); + } + if (reader.isDeleted(i)) { + continue; + } + Document d = reader.document(i, FieldSelectors.UUID); + NodeId id = new NodeId(d.get(FieldNames.UUID)); + if (!isIgnored(id)) { + boolean nodeExists = nodeIds.containsKey(id); + if (nodeExists) { + Boolean alreadyIndexed = nodeIds.put(id, Boolean.TRUE); + if (alreadyIndexed) { + multipleEntries.add(id); + } + } else { + errors.add(new NodeDeleted(id)); + } + } + } + } finally { + reader.release(); + } + + // create multiple entries errors + for (NodeId id : multipleEntries) { + errors.add(new MultipleEntries(id)); + } + + reader = index.getIndexReader(); + try { + // run through documents again and check parent + for (int i = 0; i < reader.maxDoc(); i++) { + if (i > 10 && i % (reader.maxDoc() / 5) == 0) { + long progress = Math.round((100.0 * (float) i) / ((float) reader.maxDoc() * 2f)); + log.info("progress: " + (progress + 50) + "%"); + } + if (reader.isDeleted(i)) { + continue; + } + Document d = reader.document(i, FieldSelectors.UUID_AND_PARENT); + NodeId id = new NodeId(d.get(FieldNames.UUID)); + if (!nodeIds.containsKey(id) || isIgnored(id)) { + // this node is ignored or was already marked for deletion + continue; + } + String parent = d.get(FieldNames.PARENT); + if (parent == null || parent.isEmpty()) { + continue; + } + final NodeId parentId = new NodeId(parent); + + boolean parentExists = nodeIds.containsKey(parentId); + boolean parentIndexed = parentExists && nodeIds.get(parentId); + if (parentIndexed) { + continue; + } else if (id.equals(RepositoryImpl.SYSTEM_ROOT_NODE_ID) + && parentId.equals(RepositoryImpl.ROOT_NODE_ID)) { + continue; // special case for the /jcr:system node + } + + // parent is missing from index + if (parentExists) { + errors.add(new MissingAncestor(id, parentId)); + } else { + try { + final ItemState itemState = stateMgr.getItemState(id); + if (parentId.equals(itemState.getParentId())) { + // orphaned node + errors.add(new UnknownParent(id, parentId)); + } else { + errors.add(new WrongParent(id, parentId, itemState.getParentId())); + } + } catch (ItemStateException ignored) { + } + } + } + } finally { + reader.release(); + } + + } + + protected void checkIndexCompleteness() { + log.info("Checking index completeness"); + int i = 0; + int size = nodeIds.size(); + for (Map.Entry entry : nodeIds.entrySet()) { + // check whether all nodes in the repository are indexed + NodeId nodeId = entry.getKey(); + boolean indexed = entry.getValue(); + try { + if (++i > 10 && i % (size / 10) == 0) { + long progress = Math.round((100.0 * (float) i) / (float) size); + log.info("progress: " + progress + "%"); + } + if (!indexed && !isIgnored(nodeId) && !isExcluded(nodeId)) { + NodeState nodeState = getNodeState(nodeId); + if (nodeState != null && !isBrokenNode(nodeId, nodeState)) { + errors.add(new NodeAdded(nodeId)); + } + } + } catch (ItemStateException e) { + log.error("Failed to check node: " + nodeId, e); + } + } + } + + private boolean isExcluded(NodeId id) { + try { + final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); + final Path path = hierarchyManager.getPath(id); + for (Path excludedPath : excludedPaths) { + if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) { + return true; + } + } + } catch (RepositoryException ignored) { + } + return false; + } + + private boolean isIgnored(NodeId id) { + try { + final HierarchyManager hierarchyManager = handler.getContext().getHierarchyManager(); + final Path path = hierarchyManager.getPath(id); + for (Path excludedPath : ignoredPaths) { + if (excludedPath.isEquivalentTo(path) || excludedPath.isAncestorOf(path)) { + return true; + } + } + } catch (RepositoryException ignored) { + } + return false; + } + + private NodeState getNodeState(NodeId nodeId) throws ItemStateException { + try { + return (NodeState) stateMgr.getItemState(nodeId); + } catch (NoSuchItemStateException e) { + return null; + } + } + + private boolean isBrokenNode(final NodeId nodeId, final NodeState nodeState) throws ItemStateException { + final NodeId parentId = nodeState.getParentId(); + if (parentId != null) { + final NodeState parentState = getNodeState(parentId); + if (parentState == null) { + log.warn("Node missing from index is orphaned node: " + nodeId); + return true; + } + if (!parentState.hasChildNodeEntry(nodeId)) { + log.warn("Node missing from index is abandoned node: " + nodeId); + return true; + } + } + return false; + } + + /** + * Returns the path for node. If an error occurs this method + * returns the uuid of the node. + * + * @param node the node to retrieve the path from + * @return the path of the node or its uuid. + */ + private String getPath(NodeState node) { + // remember as fallback + String uuid = node.getNodeId().toString(); + StringBuilder path = new StringBuilder(); + List elements = new ArrayList(); + try { + while (node.getParentId() != null) { + NodeId parentId = node.getParentId(); + NodeState parent = (NodeState) stateMgr.getItemState(parentId); + ChildNodeEntry entry = parent.getChildNodeEntry(node.getNodeId()); + if (entry == null) { + log.warn("Failed to build path: abandoned child {} of node {}. " + + "Please run a repository consistency check", node.getNodeId(), parentId); + return uuid; + } + elements.add(entry); + node = parent; + } + for (int i = elements.size() - 1; i > -1; i--) { + ChildNodeEntry entry = elements.get(i); + path.append('/').append(entry.getName().getLocalName()); + if (entry.getIndex() > 1) { + path.append('[').append(entry.getIndex()).append(']'); + } + } + if (path.length() == 0) { + path.append('/'); + } + return path.toString(); + } catch (ItemStateException e) { + return uuid; + } + } + + //-------------------< ConsistencyCheckError classes >---------------------- + + /** + * One or more ancestors of an indexed node are not available in the index. + */ + private class MissingAncestor extends ConsistencyCheckError { + + private final NodeId parentId; + + private MissingAncestor(NodeId id, NodeId parentId) { + super("Parent of " + id + " missing in index. Parent: " + parentId, id); + this.parentId = parentId; + } + + /** + * Returns true. + * @return true. + */ + public boolean repairable() { + return true; + } + + /** + * Repairs the missing node by indexing the missing ancestors. + * @throws Exception if an error occurs while repairing. + */ + public void repair() throws Exception { + NodeId ancestorId = parentId; + while (ancestorId != null && nodeIds.containsKey(ancestorId) && nodeIds.get(ancestorId)) { + NodeState n = (NodeState) stateMgr.getItemState(ancestorId); + log.info("Repairing missing node " + getPath(n) + " (" + ancestorId + ")"); + Document d = index.createDocument(n); + index.addDocument(d); + nodeIds.put(n.getNodeId(), Boolean.TRUE); + ancestorId = n.getParentId(); + } + } + + @Override + protected boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + throws RepositoryException, IOException { + final List documents = handler.getNodeDocuments(id); + for (Document document : documents) { + final String parent = document.get(FieldNames.PARENT); + if (parent != null && !parent.isEmpty()) { + final NodeId parentId = new NodeId(parent); + if (handler.getNodeDocuments(parentId).isEmpty()) { + return true; + } + } + } + return false; + + } + } + + /** + * The parent of a node is not in the repository + */ + private static class UnknownParent extends ConsistencyCheckError { + + private NodeId parentId; + + private UnknownParent(NodeId id, NodeId parentId) { + super("Node " + id + " has unknown parent: " + parentId, id); + this.parentId = parentId; + } + + /** + * Not reparable (yet). + * @return false. + */ + public boolean repairable() { + return false; + } + + /** + * No operation. + */ + public void repair() { + log.warn("Unknown parent for " + id + " cannot be repaired"); + } + + @Override + protected boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + throws IOException, RepositoryException { + final List documents = handler.getNodeDocuments(id); + for (Document document : documents) { + final String parent = document.get(FieldNames.PARENT); + if (parent != null && !parent.isEmpty()) { + final NodeId parentId = new NodeId(parent); + if (parentId.equals(this.parentId) && !stateManager.hasItemState(parentId)) { + return true; + } + } + } + return false; + } + } + + /** + * The parent as indexed does not correspond with the actual parent in the repository + */ + private class WrongParent extends ConsistencyCheckError { + + private NodeId indexedParentId; + + private WrongParent(NodeId id, NodeId indexedParentId, NodeId actualParentId) { + super("Node " + id + " has wrong parent: " + indexedParentId + ", should be : " + actualParentId, id); + this.indexedParentId = indexedParentId; + } + + @Override + public boolean repairable() { + return true; + } + + /** + * Reindex node. + */ + @Override + protected void repair() throws Exception { + index.removeAllDocuments(id); + try { + NodeState node = (NodeState) stateMgr.getItemState(id); + log.info("Re-indexing node with wrong parent in index: " + getPath(node)); + Document d = index.createDocument(node); + index.addDocument(d); + nodeIds.put(node.getNodeId(), Boolean.TRUE); + } catch (NoSuchItemStateException e) { + log.info("Not re-indexing node with wrong parent because node no longer exists"); + } + } + + @Override + protected boolean doubleCheck(final SearchIndex handler, final ItemStateManager stateManager) + throws RepositoryException, IOException { + final List documents = handler.getNodeDocuments(id); + for (Document document : documents) { + final String parent = document.get(FieldNames.PARENT); + if (parent != null && !parent.isEmpty()) { + final NodeId parentId = new NodeId(parent); + if (parentId.equals(indexedParentId) && !stateManager.hasItemState(parentId)) { + return true; + } + } + } + return false; + } + + } + + /** + * A node is present multiple times in the index. + */ + private class MultipleEntries extends ConsistencyCheckError { + + MultipleEntries(NodeId id) { + super("Multiple entries found for node " + id, id); + } + + /** + * Returns true. + * @return true. + */ + public boolean repairable() { + return true; + } + + /** + * Removes the nodes with the identical uuids from the index and + * re-index the node. + * @throws IOException if an error occurs while repairing. + */ + public void repair() throws Exception { + // first remove all occurrences + index.removeAllDocuments(id); + // then re-index the node + try { + NodeState node = (NodeState) stateMgr.getItemState(id); + log.info("Re-indexing duplicate node occurrences in index: " + getPath(node)); + Document d = index.createDocument(node); + index.addDocument(d); + nodeIds.put(node.getNodeId(), Boolean.TRUE); + } catch (NoSuchItemStateException e) { + log.info("Not re-indexing node with multiple occurrences because node no longer exists"); + } + } + + @Override + protected boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + throws RepositoryException, IOException { + return handler.getNodeDocuments(id).size() > 1; + } + } + + /** + * Indicates that a node has been deleted but is still in the index. + */ + private class NodeDeleted extends ConsistencyCheckError { + + NodeDeleted(NodeId id) { + super("Node " + id + " no longer exists.", id); + } + + /** + * Returns true. + * @return true. + */ + public boolean repairable() { + return true; + } + + /** + * Deletes the nodes from the index. + * @throws IOException if an error occurs while repairing. + */ + public void repair() throws IOException { + log.info("Removing deleted node from index: " + id); + index.removeDocument(id); + } + + @Override + protected boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + throws RepositoryException, IOException { + final List documents = handler.getNodeDocuments(id); + if (!documents.isEmpty()) { + if (!stateManager.hasItemState(id)) { + return true; + } + } + return false; + } + } + + private class NodeAdded extends ConsistencyCheckError { + + NodeAdded(final NodeId id) { + super("Node " + id + " is missing.", id); + } + + @Override + public boolean repairable() { + return true; + } + + @Override + protected void repair() throws Exception { + try { + NodeState nodeState = (NodeState) stateMgr.getItemState(id); + log.info("Adding missing node to index: " + getPath(nodeState)); + final Iterator remove = Collections.emptyList().iterator(); + final Iterator add = Collections.singletonList(nodeState).iterator(); + handler.updateNodes(remove, add); + } catch (NoSuchItemStateException e) { + log.info("Not adding missing node because node no longer exists"); + } + } + + @Override + protected boolean doubleCheck(SearchIndex handler, ItemStateManager stateManager) + throws RepositoryException, IOException { + final List documents = handler.getNodeDocuments(id); + if (documents.isEmpty()) { + if (stateManager.hasItemState(id)) { + return true; + } + } + return false; + } + + } + + protected ItemStateManager getStateMgr() { + return stateMgr; + } + + protected MultiIndex getIndex() { + return index; + } + + protected Map getNodeIds() { + return nodeIds; + } + + protected SearchIndex getHandler() { + return handler; + } + + protected IterablePersistenceManager getPm() { + return pm; + } + + protected Set getExcludedPaths() { + return excludedPaths; + } + + protected Set getIgnoredPaths() { + return ignoredPaths; + } +} diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java index a7239466c25..12856a7895c 100644 --- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/MultiIndex.java @@ -469,7 +469,7 @@ synchronized void update( * @throws IOException if an error occurs while adding the document to the * index. */ - void addDocument(Document doc) throws IOException { + public void addDocument(Document doc) throws IOException { Collection empty = Collections.emptyList(); update(empty, Collections.singleton(doc)); } @@ -492,7 +492,7 @@ void removeDocument(NodeId id) throws IOException { * @return the number of deleted documents. * @throws IOException if an error occurs while deleting documents. */ - synchronized int removeAllDocuments(NodeId id) throws IOException { + public synchronized int removeAllDocuments(NodeId id) throws IOException { synchronized (updateMonitor) { updateInProgress = true; } @@ -773,14 +773,40 @@ VolatileIndex getVolatileIndex() { return volatileIndex; } + /** + * @deprecated use {@link #doConsistencyCheck()} instead + */ + ConsistencyCheck runConsistencyCheck() throws IOException { + return (ConsistencyCheck) doConsistencyCheck(); + } + /** * Runs a consistency check on this multi index. * * @return the consistency check. * @throws IOException if an error occurs while running the check. */ - ConsistencyCheck runConsistencyCheck() throws IOException { - return ConsistencyCheck.run(this, handler, excludedIDs); + ConsistencyCheckInterface doConsistencyCheck() throws IOException { + ConsistencyCheckInterface check = createConsistencyCheck(); + check.run(); + return check; + } + + /** + * Creates a new ConsistencyCheckInterface instance. This method can be overridden by + * subclasses to provide a custom implementation. + * + * @return a new ConsistencyCheckInterface instance + */ + protected ConsistencyCheckInterface createConsistencyCheck() { + try { + return handler.getConsistencyCheckClass() + .getConstructor(MultiIndex.class, SearchIndex.class, Set.class) + .newInstance(this, handler, excludedIDs); + } catch (Exception e) { + log.error("Failed to create ConsistencyCheckInterface instance, using default", e); + return new DefaultConsistencyCheck(this, handler, excludedIDs); + } } /** @@ -863,7 +889,7 @@ long getIndexGeneration() { * @throws RepositoryException if an error occurs while reading from the * workspace. */ - Document createDocument(NodeState node) throws RepositoryException { + public Document createDocument(NodeState node) throws RepositoryException { return handler.createDocument(node, nsMappings, version); } diff --git a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java index 8e4db22cc2e..ce9aa42fb5b 100644 --- a/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java +++ b/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/SearchIndex.java @@ -92,7 +92,6 @@ import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; -import org.apache.lucene.index.Payload; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.search.IndexSearcher; @@ -429,6 +428,11 @@ public class SearchIndex extends AbstractQueryHandler { */ private Class synonymProviderClass; + /** + * The class that implements {@link ConsistencyCheckInterface}. + */ + private Class consistencyCheckClass = DefaultConsistencyCheck.class; + /** * The currently set synonym provider. */ @@ -588,7 +592,7 @@ protected void doInit() throws IOException { && (index.getRedoLogApplied() || forceConsistencyCheck)) { log.info("Running consistency check..."); try { - ConsistencyCheck check = runConsistencyCheck(); + ConsistencyCheckInterface check = doConsistencyCheck(); if (autoRepair) { check.repair(true); } else { @@ -1141,14 +1145,22 @@ public RedoLogFactory getRedoLogFactory() { return redoLogFactory; } + /** + * @deprecated use {@link #doConsistencyCheck()} instead + */ + @Deprecated + public ConsistencyCheck runConsistencyCheck() throws IOException { + return (ConsistencyCheck) doConsistencyCheck(); + } + /** * Runs a consistency check on this search index. * * @return the result of the consistency check. * @throws IOException if an error occurs while running the check. */ - public ConsistencyCheck runConsistencyCheck() throws IOException { - return index.runConsistencyCheck(); + public ConsistencyCheckInterface doConsistencyCheck() throws IOException { + return index.doConsistencyCheck(); } /** @@ -2309,6 +2321,42 @@ public String getIndexingConfigurationClass() { return indexingConfigurationClass.getName(); } + /** + * Sets the class name of the {@link ConsistencyCheckInterface} implementation. + * + * @param className the class name of the consistency check implementation. + */ + public void setConsistencyCheckClass(String className) { + if (className != null && className.length() > 0) { + try { + @SuppressWarnings("unchecked") + Class clazz = + (Class) Class.forName(className); + consistencyCheckClass = clazz; + } catch (ClassCastException e) { + log.warn("Invalid value for consistencyCheckClass, {} " + + "does not implement ConsistencyCheckInterface interface", className); + } catch (ClassNotFoundException e) { + log.warn("Invalid value for consistencyCheckClass, class {} not found.", + className); + } + } + } + + /** + * @return the class name of the configured {@link ConsistencyCheckInterface} implementation. + */ + public String getConsistencyCheckClassName() { + return consistencyCheckClass.getName(); + } + + /** + * @return the configured {@link ConsistencyCheckInterface} class. + */ + public Class getConsistencyCheckClass() { + return consistencyCheckClass; + } + /** * Sets the name of the class that implements {@link SynonymProvider}. The * default value is null (none set). diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java index 84aa1774751..4334e486a93 100644 --- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java +++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/TestHelper.java @@ -28,6 +28,7 @@ import org.apache.jackrabbit.core.persistence.check.ConsistencyReport; import org.apache.jackrabbit.core.query.QueryHandler; import org.apache.jackrabbit.core.query.lucene.ConsistencyCheck; +import org.apache.jackrabbit.core.query.lucene.ConsistencyCheckInterface; import org.apache.jackrabbit.core.query.lucene.SearchIndex; import org.apache.jackrabbit.test.NotExecutableException; @@ -78,7 +79,7 @@ public static ConsistencyReport checkConsistency(Session session, boolean runFix } } - public static ConsistencyCheck checkIndexConsistency(Session session) throws RepositoryException, NotExecutableException, IOException { + public static ConsistencyCheckInterface checkIndexConsistency(Session session) throws RepositoryException, NotExecutableException, IOException { Repository r = session.getRepository(); if (!(r instanceof RepositoryImpl)) { throw new NotExecutableException(); @@ -90,7 +91,7 @@ public static ConsistencyCheck checkIndexConsistency(Session session) throws Rep throw new NotExecutableException("No search index"); } SearchIndex si = (SearchIndex) qh; - return si.runConsistencyCheck(); + return si.doConsistencyCheck(); } /** diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java index 426a81a3f02..1a5c3311e42 100644 --- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java +++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/lucene/SearchIndexConsistencyCheckTest.java @@ -63,7 +63,7 @@ public void testIndexMissesNode() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 1 index consistency error", 1, errors.size()); @@ -74,7 +74,7 @@ public void testIndexMissesNode() throws Exception { assertTrue("Index was not repaired properly", searchIndexContainsNode(searchIndex, fooId)); - assertTrue("Consistency check still reports errors", searchIndex.runConsistencyCheck().getErrors().isEmpty()); + assertTrue("Consistency check still reports errors", searchIndex.doConsistencyCheck().getErrors().isEmpty()); } public void testMissingNodeDoubleCheck() throws Exception { @@ -91,7 +91,7 @@ public void testMissingNodeDoubleCheck() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 1 index consistency error", 1, errors.size()); @@ -121,7 +121,7 @@ public void testIndexContainsUnknownNode() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 1 index consistency error", 1, errors.size()); @@ -132,7 +132,7 @@ public void testIndexContainsUnknownNode() throws Exception { assertFalse("Index was not repaired properly", searchIndexContainsNode(searchIndex, nodeId)); - assertTrue("Consistency check still reports errors", searchIndex.runConsistencyCheck().getErrors().isEmpty()); + assertTrue("Consistency check still reports errors", searchIndex.doConsistencyCheck().getErrors().isEmpty()); } public void testUnknownNodeDoubleCheck() throws Exception { @@ -149,7 +149,7 @@ public void testUnknownNodeDoubleCheck() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 1 index consistency error", 1, errors.size()); @@ -180,7 +180,7 @@ public void testIndexMissesAncestor() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 2 index consistency errors", 2, errors.size()); @@ -192,7 +192,7 @@ public void testIndexMissesAncestor() throws Exception { assertTrue("Index was not repaired properly", searchIndexContainsNode(searchIndex, fooId)); - assertTrue("Consistency check still reports errors", searchIndex.runConsistencyCheck().getErrors().isEmpty()); + assertTrue("Consistency check still reports errors", searchIndex.doConsistencyCheck().getErrors().isEmpty()); } public void testMissingAncestorDoubleCheck() throws Exception { @@ -211,7 +211,7 @@ public void testMissingAncestorDoubleCheck() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 2 index consistency errors", 2, errors.size()); @@ -248,7 +248,7 @@ public void testIndexContainsMultipleEntries() throws Exception { searchIndex.updateNodes(remove, add); - ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); List errors = consistencyCheck.getErrors(); assertEquals("Expected 1 index consistency error", 1, errors.size()); @@ -268,7 +268,7 @@ public void testIndexContainsMultipleEntries() throws Exception { errors = consistencyCheck.getErrors(); assertTrue("Consistency double check of multiple entries failed", errors.isEmpty()); - assertTrue("Consistency check still finds errors", searchIndex.runConsistencyCheck().getErrors().isEmpty()); + assertTrue("Consistency check still finds errors", searchIndex.doConsistencyCheck().getErrors().isEmpty()); } @@ -302,7 +302,7 @@ public void run() { t.start(); Thread.sleep(100); for (int i = 100; i > 0; i--) { - final ConsistencyCheck consistencyCheck = searchIndex.runConsistencyCheck(); + final ConsistencyCheckInterface consistencyCheck = searchIndex.doConsistencyCheck(); consistencyCheck.doubleCheckErrors(); final List errors = consistencyCheck.getErrors(); assertTrue(errors.isEmpty());