Skip to content

Commit 85c310e

Browse files
vinayakphegdewchevreuil
authored andcommitted
HBASE-28465 Implementation of framework for time-based priority bucket-cache (#5793)
Signed-off-by: Wellington Chevreuil <[email protected]> Change-Id: If213337be959a392a9bc55aba63b4d033df8e729
1 parent c93793a commit 85c310e

File tree

5 files changed

+665
-0
lines changed

5 files changed

+665
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import org.apache.yetus.audience.InterfaceAudience;
21+
22+
@InterfaceAudience.Private
23+
public class DataTieringException extends Exception {
24+
DataTieringException(String reason) {
25+
super(reason);
26+
}
27+
}
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import java.util.HashSet;
21+
import java.util.Map;
22+
import java.util.OptionalLong;
23+
import java.util.Set;
24+
import org.apache.hadoop.conf.Configuration;
25+
import org.apache.hadoop.fs.Path;
26+
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
27+
import org.apache.hadoop.hbase.util.Bytes;
28+
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
29+
import org.apache.yetus.audience.InterfaceAudience;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
33+
/**
34+
* The DataTieringManager class categorizes data into hot data and cold data based on the specified
35+
* {@link DataTieringType} when DataTiering is enabled. DataTiering is disabled by default with
36+
* {@link DataTieringType} set to {@link DataTieringType#NONE}. The {@link DataTieringType}
37+
* determines the logic for distinguishing data into hot or cold. By default, all data is considered
38+
* as hot.
39+
*/
40+
@InterfaceAudience.Private
41+
public class DataTieringManager {
42+
private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class);
43+
public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type";
44+
public static final String DATATIERING_HOT_DATA_AGE_KEY =
45+
"hbase.hstore.datatiering.hot.age.millis";
46+
public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE;
47+
public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days
48+
private static DataTieringManager instance;
49+
private final Map<String, HRegion> onlineRegions;
50+
51+
private DataTieringManager(Map<String, HRegion> onlineRegions) {
52+
this.onlineRegions = onlineRegions;
53+
}
54+
55+
/**
56+
* Initializes the DataTieringManager instance with the provided map of online regions.
57+
* @param onlineRegions A map containing online regions.
58+
*/
59+
public static synchronized void instantiate(Map<String, HRegion> onlineRegions) {
60+
if (instance == null) {
61+
instance = new DataTieringManager(onlineRegions);
62+
LOG.info("DataTieringManager instantiated successfully.");
63+
} else {
64+
LOG.warn("DataTieringManager is already instantiated.");
65+
}
66+
}
67+
68+
/**
69+
* Retrieves the instance of DataTieringManager.
70+
* @return The instance of DataTieringManager.
71+
* @throws IllegalStateException if DataTieringManager has not been instantiated.
72+
*/
73+
public static synchronized DataTieringManager getInstance() {
74+
if (instance == null) {
75+
throw new IllegalStateException(
76+
"DataTieringManager has not been instantiated. Call instantiate() first.");
77+
}
78+
return instance;
79+
}
80+
81+
/**
82+
* Determines whether data tiering is enabled for the given block cache key.
83+
* @param key the block cache key
84+
* @return {@code true} if data tiering is enabled for the HFile associated with the key,
85+
* {@code false} otherwise
86+
* @throws DataTieringException if there is an error retrieving the HFile path or configuration
87+
*/
88+
public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException {
89+
Path hFilePath = key.getFilePath();
90+
if (hFilePath == null) {
91+
throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
92+
}
93+
return isDataTieringEnabled(hFilePath);
94+
}
95+
96+
/**
97+
* Determines whether data tiering is enabled for the given HFile path.
98+
* @param hFilePath the path to the HFile
99+
* @return {@code true} if data tiering is enabled, {@code false} otherwise
100+
* @throws DataTieringException if there is an error retrieving the configuration
101+
*/
102+
public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException {
103+
Configuration configuration = getConfiguration(hFilePath);
104+
DataTieringType dataTieringType = getDataTieringType(configuration);
105+
return !dataTieringType.equals(DataTieringType.NONE);
106+
}
107+
108+
/**
109+
* Determines whether the data associated with the given block cache key is considered hot.
110+
* @param key the block cache key
111+
* @return {@code true} if the data is hot, {@code false} otherwise
112+
* @throws DataTieringException if there is an error retrieving data tiering information or the
113+
* HFile maximum timestamp
114+
*/
115+
public boolean isHotData(BlockCacheKey key) throws DataTieringException {
116+
Path hFilePath = key.getFilePath();
117+
if (hFilePath == null) {
118+
throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path");
119+
}
120+
return isHotData(hFilePath);
121+
}
122+
123+
/**
124+
* Determines whether the data in the HFile at the given path is considered hot based on the
125+
* configured data tiering type and hot data age.
126+
* @param hFilePath the path to the HFile
127+
* @return {@code true} if the data is hot, {@code false} otherwise
128+
* @throws DataTieringException if there is an error retrieving data tiering information or the
129+
* HFile maximum timestamp
130+
*/
131+
public boolean isHotData(Path hFilePath) throws DataTieringException {
132+
Configuration configuration = getConfiguration(hFilePath);
133+
DataTieringType dataTieringType = getDataTieringType(configuration);
134+
135+
if (dataTieringType.equals(DataTieringType.TIME_RANGE)) {
136+
long hotDataAge = getDataTieringHotDataAge(configuration);
137+
138+
HStoreFile hStoreFile = getHStoreFile(hFilePath);
139+
if (hStoreFile == null) {
140+
LOG.error("HStoreFile corresponding to " + hFilePath + " doesn't exist");
141+
return false;
142+
}
143+
OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp();
144+
if (!maxTimestamp.isPresent()) {
145+
throw new DataTieringException("Maximum timestamp not present for " + hFilePath);
146+
}
147+
148+
long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime();
149+
long diff = currentTimestamp - maxTimestamp.getAsLong();
150+
return diff <= hotDataAge;
151+
}
152+
// DataTieringType.NONE or other types are considered hot by default
153+
return true;
154+
}
155+
156+
/**
157+
* Returns a set of cold data filenames from the given set of cached blocks. Cold data is
158+
* determined by the configured data tiering type and hot data age.
159+
* @param allCachedBlocks a set of all cached block cache keys
160+
* @return a set of cold data filenames
161+
* @throws DataTieringException if there is an error determining whether a block is hot
162+
*/
163+
public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks)
164+
throws DataTieringException {
165+
Set<String> coldHFiles = new HashSet<>();
166+
for (BlockCacheKey key : allCachedBlocks) {
167+
if (coldHFiles.contains(key.getHfileName())) {
168+
continue;
169+
}
170+
if (!isHotData(key)) {
171+
coldHFiles.add(key.getHfileName());
172+
}
173+
}
174+
return coldHFiles;
175+
}
176+
177+
private HRegion getHRegion(Path hFilePath) throws DataTieringException {
178+
if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) {
179+
throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
180+
}
181+
String regionId = hFilePath.getParent().getParent().getName();
182+
HRegion hRegion = this.onlineRegions.get(regionId);
183+
if (hRegion == null) {
184+
throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist");
185+
}
186+
return hRegion;
187+
}
188+
189+
private HStore getHStore(Path hFilePath) throws DataTieringException {
190+
HRegion hRegion = getHRegion(hFilePath);
191+
String columnFamily = hFilePath.getParent().getName();
192+
HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily));
193+
if (hStore == null) {
194+
throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist");
195+
}
196+
return hStore;
197+
}
198+
199+
private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException {
200+
HStore hStore = getHStore(hFilePath);
201+
for (HStoreFile file : hStore.getStorefiles()) {
202+
if (file.getPath().equals(hFilePath)) {
203+
return file;
204+
}
205+
}
206+
return null;
207+
}
208+
209+
private Configuration getConfiguration(Path hFilePath) throws DataTieringException {
210+
HStore hStore = getHStore(hFilePath);
211+
return hStore.getReadOnlyConfiguration();
212+
}
213+
214+
private DataTieringType getDataTieringType(Configuration conf) {
215+
return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name()));
216+
}
217+
218+
private long getDataTieringHotDataAge(Configuration conf) {
219+
return Long.parseLong(
220+
conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE)));
221+
}
222+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.regionserver;
19+
20+
import org.apache.yetus.audience.InterfaceAudience;
21+
22+
@InterfaceAudience.Public
23+
public enum DataTieringType {
24+
NONE,
25+
TIME_RANGE
26+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,7 @@ public HRegionServer(final Configuration conf) throws IOException {
700700
// no need to instantiate block cache and mob file cache when master not carry table
701701
if (!isMasterNotCarryTable) {
702702
blockCache = BlockCacheFactory.createBlockCache(conf);
703+
DataTieringManager.instantiate(onlineRegions);
703704
mobFileCache = new MobFileCache(conf);
704705
}
705706

0 commit comments

Comments
 (0)