diff --git a/hadoop-cloud-storage-project/hadoop-tos/pom.xml b/hadoop-cloud-storage-project/hadoop-tos/pom.xml index 4d44c09bf3068..4bbaf74e0f892 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-tos/pom.xml @@ -109,6 +109,21 @@ test-jar + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-params + test + + + org.junit.jupiter + junit-jupiter-engine + test + org.assertj assertj-core diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/TosChecksum.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/TosChecksum.java index f0bedc60c4bc6..9546a351f5ecd 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/TosChecksum.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/TosChecksum.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitContext.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitContext.java index 40d371bcf882f..a6b2468225d31 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitContext.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitContext.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitUtils.java index e592fc43d0b11..ad3f968430932 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitUtils.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitUtils.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Committer.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Committer.java index fbea4c59f40ab..c000e1c35a497 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Committer.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Committer.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitterFactory.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitterFactory.java index d85b1b5c9afd4..ca4ff93740038 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitterFactory.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/CommitterFactory.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/MagicOutputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/MagicOutputStream.java index 1bb22efc3beaa..2bcc539d54872 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/MagicOutputStream.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/MagicOutputStream.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Pending.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Pending.java index 408b6fbe4811c..b24d089f9972c 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Pending.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/Pending.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/PendingSet.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/PendingSet.java index c68db9718acda..8be0d73672833 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/PendingSet.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/PendingSet.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/SuccessData.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/SuccessData.java index c28dc1f9f4395..53adc0cb80808 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/SuccessData.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/SuccessData.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/mapred/Committer.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/mapred/Committer.java index ce380145e8118..5e6fc91d23be7 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/mapred/Committer.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/mapred/Committer.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOps.java index 7012b63babaed..21f537dd25bfa 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOps.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOps.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOpsFactory.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOpsFactory.java index 40411d02c1e96..5197995117810 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOpsFactory.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/PendingOpsFactory.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/RawPendingOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/RawPendingOps.java index fe8b453d759c5..e8210fb220fa3 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/RawPendingOps.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/commit/ops/RawPendingOps.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/common/Bytes.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/common/Bytes.java index 810551b4c4511..2228f68678118 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/common/Bytes.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/common/Bytes.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/ObjectOutputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/ObjectOutputStream.java index 8f86321e2fabc..858dc50021618 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/ObjectOutputStream.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/ObjectOutputStream.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/FileStagingPart.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/FileStagingPart.java index fb39e949febf6..d30c18af91503 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/FileStagingPart.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/FileStagingPart.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/StagingPart.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/StagingPart.java index b4fa812397285..9c61882d13eb9 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/StagingPart.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/StagingPart.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/State.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/State.java index 418baa6d9b13a..dafa4ef3d6292 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/State.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/staging/State.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/TOS.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/TOS.java index aac7b933478bb..111d8f994d574 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/TOS.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/TOS.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/AbstractCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/AbstractCredentialsProvider.java index be7cf189b5476..fff688f5c3b7d 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/AbstractCredentialsProvider.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/AbstractCredentialsProvider.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/DefaultCredentialsProviderChain.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/DefaultCredentialsProviderChain.java index ba3446c5bec71..434dfd4dcef7e 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/DefaultCredentialsProviderChain.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/DefaultCredentialsProviderChain.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/EnvironmentCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/EnvironmentCredentialsProvider.java index 8355a72037a3d..2209195e91e0b 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/EnvironmentCredentialsProvider.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/EnvironmentCredentialsProvider.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/ExpireableCredential.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/ExpireableCredential.java index d97e2a98a8ab6..cd977c4fdeece 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/ExpireableCredential.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/ExpireableCredential.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/SimpleCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/SimpleCredentialsProvider.java index 391a35e9b38b5..3d8ef2a477351 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/SimpleCredentialsProvider.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/object/tos/auth/SimpleCredentialsProvider.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FSUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FSUtils.java index 42ec266dc7bbc..32f8cd45279f4 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FSUtils.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FSUtils.java @@ -35,11 +35,15 @@ private FSUtils() { public static void checkReadParameters(byte[] buffer, int offset, int length) { Preconditions.checkArgument(buffer != null, "Null buffer"); - Preconditions.checkArgument(offset >= 0 && offset <= buffer.length, - "offset: %s is out of range [%s, %s]", offset, 0, buffer.length); + if (offset < 0 || offset > buffer.length) { + throw new IndexOutOfBoundsException( + String.format("offset: %s is out of range [%s, %s]", offset, 0, buffer.length)); + } Preconditions.checkArgument(length >= 0, "length: %s is negative", length); - Preconditions.checkArgument(buffer.length >= offset + length, - OVERFLOW_ERROR_HINT, length, offset, (buffer.length - offset)); + if (buffer.length < offset + length) { + throw new IndexOutOfBoundsException( + String.format(OVERFLOW_ERROR_HINT, length, offset, (buffer.length - offset))); + } } public static URI normalizeURI(URI fsUri, Configuration hadoopConfig) { diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FuseUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FuseUtils.java index 27219f9081a4a..e201555ae2602 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FuseUtils.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/FuseUtils.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/JsonCodec.java b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/JsonCodec.java index 5b2d7371c73d2..81dc0f4bc921f 100644 --- a/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/JsonCodec.java +++ b/hadoop-cloud-storage-project/hadoop-tos/src/main/java/org/apache/hadoop/fs/tosfs/util/JsonCodec.java @@ -1,9 +1,11 @@ /* - * ByteDance Volcengine EMR, Copyright 2022. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestEnv.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestEnv.java new file mode 100644 index 0000000000000..59f85576131b3 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestEnv.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs; + +import org.apache.hadoop.fs.tosfs.util.ParseUtils; + +public final class TestEnv { + public static final String ENV_TOS_UNIT_TEST_ENABLED = "TOS_UNIT_TEST_ENABLED"; + private static final boolean TOS_TEST_ENABLED; + + static { + TOS_TEST_ENABLED = ParseUtils.envAsBoolean(ENV_TOS_UNIT_TEST_ENABLED, false); + } + + private TestEnv() {} + + public static boolean checkTestEnabled() { + return TOS_TEST_ENABLED; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFSUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFSUtils.java new file mode 100644 index 0000000000000..511a13b84214a --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFSUtils.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestRawFSUtils { + + @Test + public void testIsAncestor() { + assertTrue(RawFSUtils.inSubtree("/", "/")); + assertTrue(RawFSUtils.inSubtree("/", "/a")); + assertTrue(RawFSUtils.inSubtree("/a", "/a")); + assertFalse(RawFSUtils.inSubtree("/a", "/")); + assertTrue(RawFSUtils.inSubtree("/", "/a/b/c")); + assertFalse(RawFSUtils.inSubtree("/a/b/c", "/")); + assertTrue(RawFSUtils.inSubtree("/", "/a/b/c.txt")); + assertFalse(RawFSUtils.inSubtree("/a/b/c.txt", "/")); + assertTrue(RawFSUtils.inSubtree("/a/b/", "/a/b")); + assertTrue(RawFSUtils.inSubtree("/a/b/", "/a/b/c")); + assertFalse(RawFSUtils.inSubtree("/a/b/c", "/a/b")); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFileSystem.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFileSystem.java new file mode 100644 index 0000000000000..bdf6ccad04e55 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestRawFileSystem.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.util.TempFiles; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestRawFileSystem { + private static final String FILE_STORE_ROOT = TempFiles.newTempDir("TestTosChecksum"); + + @Test + public void testInitializeFileSystem() throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key("filestore"), FILE_STORE_ROOT); + try (RawFileSystem fs = new RawFileSystem()) { + fs.initialize(new URI("filestore://bucket_a/a/b/c"), conf); + assertEquals("bucket_a", fs.bucket()); + + fs.initialize(new URI("filestore://bucket-/a/b/c"), conf); + assertEquals("bucket-", fs.bucket()); + + fs.initialize(new URI("filestore://-bucket/a/b/c"), conf); + assertEquals("-bucket", fs.bucket()); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosChecksum.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosChecksum.java new file mode 100644 index 0000000000000..e24e0fcc6c0e5 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosChecksum.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.conf.FileStoreKeys; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.ChecksumType; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.util.TempFiles; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestTosChecksum { + private static final String FILE_STORE_ROOT = TempFiles.newTempDir("TestTosChecksum"); + private static final String ALGORITHM_NAME = "mock-algorithm"; + private static final String PREFIX = UUIDUtils.random(); + + private ObjectStorage objectStorage; + + @BeforeAll + public static void beforeClass() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + public void setObjectStorage(ObjectStorage objectStorage) { + this.objectStorage = objectStorage; + } + + + static Stream provideArguments() throws URISyntaxException { + List values = new ArrayList<>(); + + // Case 1: file store. + Configuration fileStoreConf = new Configuration(); + fileStoreConf.set(FileStoreKeys.FS_FILESTORE_CHECKSUM_ALGORITHM, ALGORITHM_NAME); + fileStoreConf.set(FileStoreKeys.FS_FILESTORE_CHECKSUM_TYPE, ChecksumType.MD5.name()); + fileStoreConf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key("filestore"), FILE_STORE_ROOT); + URI uri0 = new URI("filestore://" + TestUtility.bucket() + "/"); + + values.add(Arguments.of( + ChecksumType.MD5, + fileStoreConf, + uri0, + ObjectStorageFactory.create(uri0.getScheme(), uri0.getAuthority(), fileStoreConf) + )); + + // Case 2: tos. + Configuration tosConf = new Configuration(); + tosConf.set(TosKeys.FS_TOS_CHECKSUM_ALGORITHM, ALGORITHM_NAME); + tosConf.set(TosKeys.FS_TOS_CHECKSUM_TYPE, ChecksumType.CRC32C.name()); + URI uri1 = new URI(TOS_SCHEME + "://" + TestUtility.bucket() + "/"); + + values.add(Arguments.of( + ChecksumType.CRC32C, + tosConf, + uri1, + ObjectStorageFactory.create(uri1.getScheme(), uri1.getAuthority(), tosConf) + )); + + return values.stream(); + } + + @AfterEach + public void tearDown() { + objectStorage.deleteAll(PREFIX); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testChecksumInfo(ChecksumType type, Configuration conf, URI uri, + ObjectStorage objectStore) { + setObjectStorage(objectStore); + + assertEquals(ALGORITHM_NAME, objectStore.checksumInfo().algorithm()); + assertEquals(type, objectStore.checksumInfo().checksumType()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testFileChecksum(ChecksumType type, Configuration conf, URI uri, + ObjectStorage objectStore) throws Exception { + setObjectStorage(objectStore); + + try (RawFileSystem fs = new RawFileSystem()) { + fs.initialize(uri, conf); + Path file = new Path("/" + PREFIX, "testFileChecksum"); + fs.create(file).close(); + FileChecksum checksum = fs.getFileChecksum(file, Long.MAX_VALUE); + assertEquals(ALGORITHM_NAME, checksum.getAlgorithmName()); + + String key = file.toString().substring(1); + byte[] checksumData = objectStore.head(key).checksum(); + assertArrayEquals(checksumData, checksum.getBytes()); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosFileSystem.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosFileSystem.java new file mode 100644 index 0000000000000..8b70f261416b6 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/TestTosFileSystem.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestTosFileSystem { + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @Test + public void testUriVerification() throws URISyntaxException, IOException { + Configuration conf = new Configuration(false); + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://cluster-0/"); + + TosFileSystem tfs = new TosFileSystem(); + assertThrows(IllegalArgumentException.class, + () -> tfs.initialize(new URI("hdfs://cluster/"), conf), "Expect invalid uri error."); + assertThrows(IllegalArgumentException.class, () -> tfs.initialize(new URI("/path"), conf), + "Expect invalid uri error."); + tfs.initialize(new URI(String.format("tos://%s/", TestUtility.bucket())), conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/BaseJobSuite.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/BaseJobSuite.java new file mode 100644 index 0000000000000..0d2403dac2dc5 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/BaseJobSuite.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.object.MultipartUpload; +import org.apache.hadoop.fs.tosfs.object.ObjectInfo; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.util.ParseUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public abstract class BaseJobSuite { + private static final Logger LOG = LoggerFactory.getLogger(BaseJobSuite.class); + public static final int DEFAULT_APP_ATTEMPT_ID = 1; + protected static final Text KEY_1 = new Text("key1"); + protected static final Text KEY_2 = new Text("key2"); + protected static final Text VAL_1 = new Text("val1"); + protected static final Text VAL_2 = new Text("val2"); + + private Job job; + private String jobId; + private FileSystem fs; + private Path outputPath; + private ObjectStorage storage; + + private final boolean dumpObjectStorage = ParseUtils.envAsBoolean("DUMP_OBJECT_STORAGE", false); + + protected abstract Path magicPartPath(); + + protected abstract Path magicPendingSetPath(); + + protected abstract void assertSuccessMarker() throws IOException; + + protected abstract void assertSummaryReport(Path reportDir) throws IOException; + + protected abstract void assertNoTaskAttemptPath() throws IOException; + + protected void assertMagicPathExist(Path output) throws IOException { + Path magicPath = CommitUtils.magicPath(output); + assertTrue(fs.exists(magicPath), String.format("Magic path: %s should exist", magicPath)); + } + + protected void assertMagicPathNotExist(Path output) throws IOException { + Path magicPath = CommitUtils.magicPath(output); + assertFalse(fs.exists(magicPath), String.format("Magic path: %s should not exist", magicPath)); + } + + protected abstract boolean skipTests(); + + public Path magicPendingPath() { + Path magicPart = magicPartPath(); + return new Path(magicPart.getParent(), magicPart.getName() + ".pending"); + } + + public Path magicJobPath() { + return CommitUtils.magicPath(outputPath); + } + + public String magicPartKey() { + return ObjectUtils.pathToKey(magicPartPath()); + } + + public String destPartKey() { + return MagicOutputStream.toDestKey(magicPartPath()); + } + + public Job job() { + return job; + } + + public String jobId() { + return jobId; + } + + public FileSystem fs() { + return fs; + } + + public Path outputPath() { + return outputPath; + } + + public ObjectStorage storage() { + return storage; + } + + public void setJob(Job value) { + this.job = value; + } + + public void setJobId(String value) { + this.jobId = value; + } + + public void setFs(FileSystem value) { + this.fs = value; + } + + public void setOutputPath(Path value) { + this.outputPath = value; + } + + public void setObjectStorage(ObjectStorage value) { + this.storage = value; + } + + public void assertHasMagicKeys() { + Iterable objects = storage.listAll(ObjectUtils.pathToKey(magicJobPath(), true), ""); + assertTrue( + Iterables.any(objects, o -> o.key().contains(CommitUtils.MAGIC) && o.key().contains(jobId)), + "Should have some __magic object keys"); + } + + public void assertHasBaseKeys() { + Iterable objects = storage.listAll(ObjectUtils.pathToKey(magicJobPath(), true), ""); + assertTrue( + Iterables.any(objects, o -> o.key().contains(CommitUtils.BASE) && o.key().contains(jobId)), + "Should have some __base object keys"); + } + + public void assertNoMagicPendingFile() { + String magicPendingKey = String.format("%s.pending", magicPartKey()); + assertNull(storage.head(magicPendingKey), "Magic pending key should exist"); + } + + public void assertHasMagicPendingFile() { + String magicPendingKey = String.format("%s.pending", magicPartKey()); + assertNotNull(storage.head(magicPendingKey), "Magic pending key should exist"); + } + + public void assertNoMagicMultipartUpload() { + Iterable uploads = + storage.listUploads(ObjectUtils.pathToKey(magicJobPath(), true)); + boolean anyMagicUploads = Iterables.any(uploads, u -> u.key().contains(CommitUtils.MAGIC)); + assertFalse(anyMagicUploads, "Should have no magic multipart uploads"); + } + + public void assertNoMagicObjectKeys() { + Iterable objects = storage.listAll(ObjectUtils.pathToKey(magicJobPath(), true), ""); + boolean anyMagicUploads = + Iterables.any(objects, o -> o.key().contains(CommitUtils.MAGIC) && o.key().contains(jobId)); + assertFalse(anyMagicUploads, "Should not have any magic keys"); + } + + public void assertHasPendingSet() { + Iterable objects = storage.listAll(ObjectUtils.pathToKey(magicJobPath(), true), ""); + boolean anyPendingSet = Iterables.any(objects, + o -> o.key().contains(CommitUtils.PENDINGSET_SUFFIX) && o.key().contains(jobId)); + assertTrue(anyPendingSet, "Should have the expected .pendingset file"); + } + + public void assertPendingSetAtRightLocation() { + Iterable objects = storage.listAll(ObjectUtils.pathToKey(magicJobPath(), true), ""); + Path magicJobAttemptPath = + CommitUtils.magicJobAttemptPath(job().getJobID().toString(), DEFAULT_APP_ATTEMPT_ID, + outputPath); + String inQualifiedPath = magicJobAttemptPath.toUri().getPath().substring(1); + Iterable filtered = Iterables.filter(objects, + o -> o.key().contains(CommitUtils.PENDINGSET_SUFFIX) && o.key().contains(jobId)); + boolean pendingSetAtRightLocation = Iterables.any(filtered, + o -> o.key().startsWith(inQualifiedPath) && o.key().contains(jobId)); + assertTrue(pendingSetAtRightLocation, + "The .pendingset file should locate at the job's magic output path."); + } + + public void assertMultipartUpload(int expectedUploads) { + // Note: should be care in concurrent case: they need to check the same output path. + Iterable uploads = + storage.listUploads(ObjectUtils.pathToKey(outputPath, true)); + long actualUploads = StreamSupport.stream(uploads.spliterator(), false).count(); + assertEquals(expectedUploads, actualUploads); + } + + public void assertPartFiles(int num) throws IOException { + FileStatus[] files = fs.listStatus(outputPath, + f -> !MagicOutputStream.isMagic(new Path(f.toUri())) && f.toUri().toString() + .contains("part-")); + assertEquals(num, files.length); + Iterable objects = storage.listAll(ObjectUtils.pathToKey(outputPath, true), ""); + List infos = Arrays.stream(Iterables.toArray(objects, ObjectInfo.class)) + .filter(o -> o.key().contains("part-")).collect(Collectors.toList()); + assertEquals(num, infos.size(), + String.format("Part files number should be %d, but got %d", num, infos.size())); + } + + public void assertNoPartFiles() throws IOException { + FileStatus[] files = fs.listStatus(outputPath, + f -> !MagicOutputStream.isMagic(new Path(f.toUri())) && f.toUri().toString() + .contains("part-")); + assertEquals(0, files.length); + Iterable objects = storage.listAll(ObjectUtils.pathToKey(outputPath, true), ""); + boolean anyPartFile = Iterables.any(objects, o -> o.key().contains("part-")); + assertFalse(anyPartFile, "Should have no part files"); + } + + public void dumpObjectStorage() { + if (dumpObjectStorage) { + LOG.info("===> Dump object storage - Start <==="); + dumpObjectKeys(); + dumpMultipartUploads(); + LOG.info("===> Dump object storage - End <==="); + } + } + + public void dumpObjectKeys() { + String prefix = ObjectUtils.pathToKey(magicJobPath()); + LOG.info("Dump object keys with prefix {}", prefix); + storage.listAll("", "").forEach(o -> LOG.info("Dump object keys - {}", o)); + } + + public void dumpMultipartUploads() { + String prefix = ObjectUtils.pathToKey(magicJobPath()); + LOG.info("Dump multi part uploads with prefix {}", prefix); + storage.listUploads("") + .forEach(u -> LOG.info("Dump multipart uploads - {}", u)); + } + + public void verifyPartContent() throws IOException { + String partKey = destPartKey(); + LOG.info("Part key to verify is: {}", partKey); + try (InputStream in = storage.get(partKey).stream()) { + byte[] data = IOUtils.toByteArray(in); + String expected = String.format("%s\t%s\n%s\t%s\n", KEY_1, VAL_1, KEY_2, VAL_2); + assertEquals(expected, new String(data, StandardCharsets.UTF_8)); + } + } + + public void assertSuccessMarkerNotExist() throws IOException { + Path succPath = CommitUtils.successMarker(outputPath); + assertFalse(fs.exists(succPath), String.format("%s should not exists", succPath)); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/CommitterTestBase.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/CommitterTestBase.java new file mode 100644 index 0000000000000..43b168593ce9a --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/CommitterTestBase.java @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public abstract class CommitterTestBase { + private Configuration conf; + private FileSystem fs; + private Path outputPath; + private TaskAttemptID job1Task0Attempt0; + private TaskAttemptID job2Task1Attempt0; + private Path reportDir; + + @BeforeEach + public void setup() throws IOException { + conf = newConf(); + fs = FileSystem.get(conf); + String uuid = UUIDUtils.random(); + outputPath = fs.makeQualified(new Path("/test/" + uuid)); + job1Task0Attempt0 = JobSuite.createTaskAttemptId(randomTrimmedJobId(), 0, 0); + job2Task1Attempt0 = JobSuite.createTaskAttemptId(randomTrimmedJobId(), 1, 0); + + reportDir = fs.makeQualified(new Path("/report/" + uuid)); + fs.mkdirs(reportDir); + conf.set(Committer.COMMITTER_SUMMARY_REPORT_DIR, reportDir.toUri().toString()); + } + + protected abstract Configuration newConf(); + + @AfterEach + public void teardown() { + CommonUtils.runQuietly(() -> fs.delete(outputPath, true)); + IOUtils.closeStream(fs); + } + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @AfterAll + public static void afterClass() { + List committerThreads = Thread.getAllStackTraces().keySet() + .stream() + .map(Thread::getName) + .filter(n -> n.startsWith(Committer.THREADS_PREFIX)) + .collect(Collectors.toList()); + assertTrue(committerThreads.isEmpty(), "Outstanding committer threads"); + } + + private static String randomTrimmedJobId() { + SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd"); + return String.format("%s%04d_%04d", formatter.format(new Date()), + (long) (Math.random() * 1000), + (long) (Math.random() * 1000)); + } + + private static String randomFormedJobId() { + return String.format("job_%s", randomTrimmedJobId()); + } + + @Test + public void testSetupJob() throws IOException { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + // Setup job. + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + } + + @Test + public void testSetupJobWithOrphanPaths() throws IOException, InterruptedException { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + // Orphan success marker. + Path successPath = CommitUtils.successMarker(outputPath); + CommitUtils.save(fs, successPath, new byte[]{}); + assertTrue(fs.exists(successPath)); + + // Orphan job path. + Path jobPath = CommitUtils.magicJobPath(suite.committer().jobId(), outputPath); + fs.mkdirs(jobPath); + assertTrue(fs.exists(jobPath), "The job path should be existing"); + Path subPath = new Path(jobPath, "tmp.pending"); + CommitUtils.save(fs, subPath, new byte[]{}); + assertTrue(fs.exists(subPath), "The sub path under job path should be existing."); + FileStatus jobPathStatus = fs.getFileStatus(jobPath); + + Thread.sleep(1000L); + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + + assertFalse(fs.exists(successPath), "Should have deleted the success path"); + assertTrue(fs.exists(jobPath), "Should have re-created the job path"); + assertFalse(fs.exists(subPath), "Should have deleted the sub path under the job path"); + } + + @Test + public void testSetupTask() throws IOException { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + // Remaining attempt task path. + Path taskAttemptBasePath = + CommitUtils.magicTaskAttemptBasePath(suite.taskAttemptContext(), outputPath); + Path subTaskAttemptPath = new Path(taskAttemptBasePath, "tmp.pending"); + CommitUtils.save(fs, subTaskAttemptPath, new byte[]{}); + assertTrue(fs.exists(taskAttemptBasePath)); + assertTrue(fs.exists(subTaskAttemptPath)); + + // Setup job. + suite.setupJob(); + suite.assertHasMagicKeys(); + // It will clear all the job path once we've set up the job. + assertFalse(fs.exists(taskAttemptBasePath)); + assertFalse(fs.exists(subTaskAttemptPath)); + + // Left some the task paths. + CommitUtils.save(fs, subTaskAttemptPath, new byte[]{}); + assertTrue(fs.exists(taskAttemptBasePath)); + assertTrue(fs.exists(subTaskAttemptPath)); + + // Setup task. + suite.setupTask(); + assertFalse(fs.exists(subTaskAttemptPath)); + } + + @Test + public void testCommitTask() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + + // Setup job + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + + // Setup task + suite.setupTask(); + + // Write records. + suite.assertNoMagicPendingFile(); + suite.assertMultipartUpload(0); + suite.writeOutput(); + suite.dumpObjectStorage(); + suite.assertHasMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(1); + // Assert the pending file content. + Path pendingPath = suite.magicPendingPath(); + byte[] pendingData = CommitUtils.load(suite.fs(), pendingPath); + Pending pending = Pending.deserialize(pendingData); + assertEquals(suite.destPartKey(), pending.destKey()); + assertEquals(20, pending.length()); + assertEquals(1, pending.parts().size()); + + // Commit the task. + suite.commitTask(); + + // Verify the pending set file. + suite.assertHasPendingSet(); + // Assert the pending set file content. + Path pendingSetPath = suite.magicPendingSetPath(); + byte[] pendingSetData = CommitUtils.load(suite.fs(), pendingSetPath); + PendingSet pendingSet = PendingSet.deserialize(pendingSetData); + assertEquals(suite.job().getJobID().toString(), pendingSet.jobId()); + assertEquals(1, pendingSet.commits().size()); + assertEquals(pending, pendingSet.commits().get(0)); + assertEquals(pendingSet.extraData(), ImmutableMap.of(CommitUtils.TASK_ATTEMPT_ID, + suite.taskAttemptContext().getTaskAttemptID().toString())); + + // Complete the multipart upload and verify the results. + ObjectStorage storage = suite.storage(); + storage.completeUpload(pending.destKey(), pending.uploadId(), pending.parts()); + suite.verifyPartContent(); + } + + @Test + public void testAbortTask() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + + // Pre-check before the output write. + suite.assertNoMagicPendingFile(); + suite.assertMultipartUpload(0); + + // Execute the output write. + suite.writeOutput(); + + // Post-check after the output write. + suite.assertHasMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(1); + // Assert the pending file content. + Path pendingPath = suite.magicPendingPath(); + byte[] pendingData = CommitUtils.load(suite.fs(), pendingPath); + Pending pending = Pending.deserialize(pendingData); + assertEquals(suite.destPartKey(), pending.destKey()); + assertEquals(20, pending.length()); + assertEquals(1, pending.parts().size()); + + // Abort the task. + suite.abortTask(); + + // Verify the state after aborting task. + suite.assertNoMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(0); + suite.assertNoTaskAttemptPath(); + } + + @Test + public void testCommitJob() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job. + suite.assertNoPartFiles(); + suite.commitJob(); + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.assertSummaryReport(reportDir); + suite.verifyPartContent(); + } + + + @Test + public void testCommitJobFailed() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job. + suite.assertNoPartFiles(); + suite.commitJob(); + } + + @Test + public void testCommitJobSuccessMarkerFailed() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + CommitUtils.injectError("marker"); + // Commit the job. + suite.assertNoPartFiles(); + assertThrows(IOException.class, suite::commitJob, "Expect commit job error."); + CommitUtils.removeError("marker"); + + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarkerNotExist(); + assertEquals(0, suite.fs().listStatus(suite.outputPath()).length); + } + + @Test + public void testTaskCommitAfterJobCommit() throws Exception { + JobSuite suite = JobSuite.create(conf, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job + suite.assertNoPartFiles(); + suite.commitJob(); + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.verifyPartContent(); + + // Commit the task again. + assertThrows(FileNotFoundException.class, suite::commitTask); + } + + @Test + public void testTaskCommitWithConsistentJobId() throws Exception { + Configuration config = newConf(); + String consistentJobId = randomFormedJobId(); + config.set(CommitUtils.SPARK_WRITE_UUID, consistentJobId); + JobSuite suite = JobSuite.create(config, job1Task0Attempt0, outputPath); + assumeFalse(suite.skipTests()); + + // By now, we have two "jobId"s, one is spark uuid, and the other is the jobId in taskAttempt. + // The job committer will adopt the former. + suite.setupJob(); + + // Next, we clear spark uuid, and set the jobId of taskAttempt to another value. In this case, + // the committer will take the jobId of taskAttempt as the final jobId, which is not consistent + // with the one that committer holds. + config.unset(CommitUtils.SPARK_WRITE_UUID); + String anotherJobId = randomTrimmedJobId(); + TaskAttemptID taskAttemptId1 = + JobSuite.createTaskAttemptId(anotherJobId, JobSuite.DEFAULT_APP_ATTEMPT_ID); + final TaskAttemptContext attemptContext1 = + JobSuite.createTaskAttemptContext(config, taskAttemptId1, JobSuite.DEFAULT_APP_ATTEMPT_ID); + + assertThrows(IllegalArgumentException.class, () -> suite.setupTask(attemptContext1), + "JobId set in the context"); + + // Even though we use another taskAttempt, as long as we ensure the spark uuid is consistent, + // the jobId in committer is consistent. + config.set(CommitUtils.SPARK_WRITE_UUID, consistentJobId); + config.set(FileOutputFormat.OUTDIR, outputPath.toString()); + anotherJobId = randomTrimmedJobId(); + TaskAttemptID taskAttemptId2 = + JobSuite.createTaskAttemptId(anotherJobId, JobSuite.DEFAULT_APP_ATTEMPT_ID); + TaskAttemptContext attemptContext2 = + JobSuite.createTaskAttemptContext(config, taskAttemptId2, JobSuite.DEFAULT_APP_ATTEMPT_ID); + + suite.setupTask(attemptContext2); + // Write output must use the same task context with setup task. + suite.writeOutput(attemptContext2); + // Commit task must use the same task context with setup task. + suite.commitTask(attemptContext2); + suite.assertPendingSetAtRightLocation(); + + // Commit the job + suite.assertNoPartFiles(); + suite.commitJob(); + + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.verifyPartContent(); + } + + @Test + public void testConcurrentJobs() throws Exception { + JobSuite suite1 = JobSuite.create(conf, job1Task0Attempt0, outputPath); + JobSuite suite2 = JobSuite.create(conf, job2Task1Attempt0, outputPath); + assumeFalse(suite1.skipTests()); + assumeFalse(suite2.skipTests()); + suite1.setupJob(); + suite2.setupJob(); + suite1.setupTask(); + suite2.setupTask(); + suite1.writeOutput(); + suite2.writeOutput(); + suite1.commitTask(); + suite2.commitTask(); + + // Job2 commit the job. + suite2.assertNoPartFiles(); + suite2.commitJob(); + suite2.assertPartFiles(1); + + suite2.assertNoMagicMultipartUpload(); + suite2.assertNoMagicObjectKeys(); + suite2.assertSuccessMarker(); + suite2.assertSummaryReport(reportDir); + suite2.verifyPartContent(); + suite2.assertMagicPathExist(outputPath); + + // Job1 commit the job. + suite1.commitJob(); + suite2.assertPartFiles(2); + + // Verify the output. + suite1.assertNoMagicMultipartUpload(); + suite1.assertNoMagicObjectKeys(); + suite1.assertSuccessMarker(); + suite1.assertSummaryReport(reportDir); + suite1.verifyPartContent(); + suite1.assertMagicPathNotExist(outputPath); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/JobSuite.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/JobSuite.java new file mode 100644 index 0000000000000..dd80bb7213b02 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/JobSuite.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.PathOutputCommitterFactory; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.mapreduce.task.JobContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.hadoop.net.NetUtils; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public final class JobSuite extends BaseJobSuite { + private static final CommitterFactory FACTORY = new CommitterFactory(); + private final JobContext jobContext; + private final TaskAttemptContext taskAttemptContext; + private final Committer committer; + + private JobSuite(FileSystem fs, Configuration conf, TaskAttemptID taskAttemptId, int appAttemptId, + Path outputPath) throws IOException { + setFs(fs); + // Initialize the job instance. + setJob(Job.getInstance(conf)); + job().setJobID(JobID.forName(CommitUtils.buildJobId(conf, taskAttemptId.getJobID()))); + this.jobContext = createJobContext(job().getConfiguration(), taskAttemptId); + setJobId(CommitUtils.buildJobId(jobContext)); + this.taskAttemptContext = + createTaskAttemptContext(job().getConfiguration(), taskAttemptId, appAttemptId); + + // Set job output directory. + FileOutputFormat.setOutputPath(job(), outputPath); + setOutputPath(outputPath); + setObjectStorage(ObjectStorageFactory.create(outputPath.toUri().getScheme(), + outputPath.toUri().getAuthority(), conf)); + + // Initialize committer. + this.committer = (Committer) FACTORY.createOutputCommitter(outputPath, taskAttemptContext); + } + + public static JobSuite create(Configuration conf, TaskAttemptID taskAttemptId, Path outDir) + throws IOException { + FileSystem fs = outDir.getFileSystem(conf); + return new JobSuite(fs, conf, taskAttemptId, DEFAULT_APP_ATTEMPT_ID, outDir); + } + + public static TaskAttemptID createTaskAttemptId(String trimmedJobId, int attemptId) { + String attempt = String.format("attempt_%s_m_000000_%d", trimmedJobId, attemptId); + return TaskAttemptID.forName(attempt); + } + + public static TaskAttemptID createTaskAttemptId(String trimmedJobId, int taskId, int attemptId) { + String[] parts = trimmedJobId.split("_"); + return new TaskAttemptID(parts[0], Integer.parseInt(parts[1]), TaskType.MAP, taskId, attemptId); + } + + public static JobContext createJobContext(Configuration jobConf, TaskAttemptID taskAttemptId) { + return new JobContextImpl(jobConf, taskAttemptId.getJobID()); + } + + public static TaskAttemptContext createTaskAttemptContext( + Configuration jobConf, TaskAttemptID taskAttemptId, int appAttemptId) throws IOException { + // Set the key values for job configuration. + jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); + jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, appAttemptId); + jobConf.set(PathOutputCommitterFactory.COMMITTER_FACTORY_CLASS, + CommitterFactory.class.getName()); + return new TaskAttemptContextImpl(jobConf, taskAttemptId); + } + + public void setupJob() throws IOException { + committer.setupJob(jobContext); + } + + public void setupTask() throws IOException { + committer.setupTask(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void setupTask(TaskAttemptContext taskAttemptCxt) throws IOException { + committer.setupTask(taskAttemptCxt); + } + + public void writeOutput() throws Exception { + writeOutput(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void writeOutput(TaskAttemptContext taskAttemptCxt) throws Exception { + RecordWriter writer = new TextOutputFormat<>().getRecordWriter(taskAttemptCxt); + NullWritable nullKey = NullWritable.get(); + NullWritable nullVal = NullWritable.get(); + Object[] keys = new Object[]{KEY_1, nullKey, null, nullKey, null, KEY_2}; + Object[] vals = new Object[]{VAL_1, nullVal, null, null, nullVal, VAL_2}; + try { + assertEquals(keys.length, vals.length); + for (int i = 0; i < keys.length; i++) { + writer.write(keys[i], vals[i]); + } + } finally { + writer.close(taskAttemptCxt); + } + } + + public boolean needsTaskCommit() { + return committer.needsTaskCommit(taskAttemptContext); + } + + public void commitTask() throws IOException { + committer.commitTask(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void commitTask(TaskAttemptContext taskAttemptCxt) throws IOException { + committer.commitTask(taskAttemptCxt); + } + + public void abortTask() throws IOException { + committer.abortTask(taskAttemptContext); + } + + public void commitJob() throws IOException { + committer.commitJob(jobContext); + } + + @Override + public Path magicPartPath() { + return new Path(committer.getWorkPath(), + FileOutputFormat.getUniqueFile(taskAttemptContext, "part", "")); + } + + @Override + public Path magicPendingSetPath() { + return CommitUtils.magicTaskPendingSetPath(taskAttemptContext, outputPath()); + } + + public TaskAttemptContext taskAttemptContext() { + return taskAttemptContext; + } + + public Committer committer() { + return committer; + } + + @Override + public void assertNoTaskAttemptPath() throws IOException { + Path path = CommitUtils.magicTaskAttemptBasePath(taskAttemptContext, outputPath()); + assertFalse(fs().exists(path), "Task attempt path should be not existing"); + String pathToKey = ObjectUtils.pathToKey(path); + assertNull(storage().head(pathToKey), "Should have no task attempt path key"); + } + + @Override + protected boolean skipTests() { + return storage().bucket().isDirectory(); + } + + @Override + public void assertSuccessMarker() throws IOException { + Path succPath = CommitUtils.successMarker(outputPath()); + assertTrue(fs().exists(succPath), String.format("%s should be exists", succPath)); + SuccessData successData = SuccessData.deserialize(CommitUtils.load(fs(), succPath)); + assertEquals(SuccessData.class.getName(), successData.name()); + assertTrue(successData.success()); + assertEquals(NetUtils.getHostname(), successData.hostname()); + assertEquals(CommitUtils.COMMITTER_NAME, successData.committer()); + assertEquals( + String.format("Task committer %s", taskAttemptContext.getTaskAttemptID()), + successData.description()); + assertEquals(job().getJobID().toString(), successData.jobId()); + assertEquals(1, successData.filenames().size()); + assertEquals(destPartKey(), successData.filenames().get(0)); + } + + @Override + public void assertSummaryReport(Path reportDir) throws IOException { + Path reportPath = CommitUtils.summaryReport(reportDir, job().getJobID().toString()); + assertTrue(fs().exists(reportPath), String.format("%s should be exists", reportPath)); + SuccessData reportData = SuccessData.deserialize(CommitUtils.load(fs(), reportPath)); + assertEquals(SuccessData.class.getName(), reportData.name()); + assertTrue(reportData.success()); + assertEquals(NetUtils.getHostname(), reportData.hostname()); + assertEquals(CommitUtils.COMMITTER_NAME, reportData.committer()); + assertEquals( + String.format("Task committer %s", taskAttemptContext.getTaskAttemptID()), + reportData.description()); + assertEquals(job().getJobID().toString(), reportData.jobId()); + assertEquals(1, reportData.filenames().size()); + assertEquals(destPartKey(), reportData.filenames().get(0)); + assertEquals("clean", reportData.diagnostics().get("stage")); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/MRJobTestBase.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/MRJobTestBase.java new file mode 100644 index 0000000000000..f57c50e51e494 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/MRJobTestBase.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.examples.terasort.TeraGen; +import org.apache.hadoop.examples.terasort.TeraSort; +import org.apache.hadoop.examples.terasort.TeraSortConfigKeys; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.object.ObjectInfo; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.WordCount; +import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public abstract class MRJobTestBase { + private static final Logger LOG = LoggerFactory.getLogger(MRJobTestBase.class); + + private static Configuration conf = new Configuration(); + private static MiniMRYarnCluster yarnCluster; + + private static FileSystem fs; + + private static Path testDataPath; + + public static void setConf(Configuration newConf) { + conf = newConf; + } + + @BeforeAll + public static void beforeClass() throws IOException { + assumeTrue(TestEnv.checkTestEnabled()); + + conf.setBoolean(JHAdminConfig.MR_HISTORY_CLEANER_ENABLE, false); + conf.setBoolean(YarnConfiguration.NM_DISK_HEALTH_CHECK_ENABLE, false); + conf.setInt(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 100); + + conf.set("mapreduce.outputcommitter.factory.scheme.tos", + CommitterFactory.class.getName()); // 3x newApiCommitter=true. + conf.set("mapred.output.committer.class", + Committer.class.getName()); // 2x and 3x newApiCommitter=false. + conf.set("mapreduce.outputcommitter.class", + org.apache.hadoop.fs.tosfs.commit.Committer.class.getName()); // 2x newApiCommitter=true. + + // Start the yarn cluster. + yarnCluster = new MiniMRYarnCluster("yarn-" + System.currentTimeMillis(), 2); + LOG.info("Default filesystem: {}", conf.get("fs.defaultFS")); + LOG.info("Default filesystem implementation: {}", conf.get("fs.AbstractFileSystem.tos.impl")); + + yarnCluster.init(conf); + yarnCluster.start(); + + fs = FileSystem.get(conf); + testDataPath = new Path("/mr-test-" + UUIDUtils.random()) + .makeQualified(fs.getUri(), fs.getWorkingDirectory()); + } + + @AfterAll + public static void afterClass() throws IOException { + if (!TestEnv.checkTestEnabled()) { + return; + } + + fs.delete(testDataPath, true); + if (yarnCluster != null) { + yarnCluster.stop(); + } + } + + @AfterEach + public void after() throws IOException { + } + + @Test + public void testTeraGen() throws Exception { + Path teraGenPath = + new Path(testDataPath, "teraGen").makeQualified(fs.getUri(), fs.getWorkingDirectory()); + Path output = new Path(teraGenPath, "output"); + JobConf jobConf = new JobConf(yarnCluster.getConfig()); + jobConf.addResource(conf); + jobConf.setInt(TeraSortConfigKeys.SAMPLE_SIZE.key(), 1000); + jobConf.setInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), 10); + jobConf.setBoolean(TeraSortConfigKeys.USE_SIMPLE_PARTITIONER.key(), false); + + String[] args = new String[]{Integer.toString(1000), output.toString()}; + int result = ToolRunner.run(jobConf, new TeraGen(), args); + assertEquals(0, result, String.format("teragen %s", StringUtils.join(" ", args))); + + // Verify the success data. + ObjectStorage storage = ObjectStorageFactory.create( + output.toUri().getScheme(), output.toUri().getAuthority(), conf); + int byteSizes = 0; + + Path success = new Path(output, CommitUtils.SUCCESS); + byte[] serializedData = CommitUtils.load(fs, success); + SuccessData successData = SuccessData.deserialize(serializedData); + assertTrue(successData.success(), "Should execute successfully"); + // Assert the destination paths. + assertEquals(2, successData.filenames().size()); + successData.filenames().sort(String::compareTo); + assertEquals(ObjectUtils.pathToKey(new Path(output, "part-m-00000")), + successData.filenames().get(0)); + assertEquals(ObjectUtils.pathToKey(new Path(output, "part-m-00001")), + successData.filenames().get(1)); + + for (String partFileKey : successData.filenames()) { + ObjectInfo objectInfo = storage.head(partFileKey); + assertNotNull(objectInfo, "Output file should be existing"); + byteSizes += objectInfo.size(); + } + + assertEquals(byteSizes, 100 /* Each row 100 bytes */ * 1000 /* total 1000 rows */); + } + + @Test + public void testTeraSort() throws Exception { + Path teraGenPath = + new Path(testDataPath, "teraGen").makeQualified(fs.getUri(), fs.getWorkingDirectory()); + Path inputPath = new Path(teraGenPath, "output"); + Path outputPath = new Path(teraGenPath, "sortOutput"); + JobConf jobConf = new JobConf(yarnCluster.getConfig()); + jobConf.addResource(conf); + jobConf.setInt(TeraSortConfigKeys.SAMPLE_SIZE.key(), 1000); + jobConf.setInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), 10); + jobConf.setBoolean(TeraSortConfigKeys.USE_SIMPLE_PARTITIONER.key(), false); + String[] args = new String[]{inputPath.toString(), outputPath.toString()}; + int result = ToolRunner.run(jobConf, new TeraSort(), args); + assertEquals(0, result, String.format("terasort %s", StringUtils.join(" ", args))); + + // Verify the success data. + ObjectStorage storage = ObjectStorageFactory + .create(outputPath.toUri().getScheme(), outputPath.toUri().getAuthority(), conf); + int byteSizes = 0; + + Path success = new Path(outputPath, CommitUtils.SUCCESS); + byte[] serializedData = CommitUtils.load(fs, success); + SuccessData successData = SuccessData.deserialize(serializedData); + assertTrue(successData.success(), "Should execute successfully"); + // Assert the destination paths. + assertEquals(1, successData.filenames().size()); + successData.filenames().sort(String::compareTo); + assertEquals(ObjectUtils.pathToKey(new Path(outputPath, "part-r-00000")), + successData.filenames().get(0)); + + for (String partFileKey : successData.filenames()) { + ObjectInfo objectInfo = storage.head(partFileKey); + assertNotNull(objectInfo, "Output file should be existing"); + byteSizes += objectInfo.size(); + } + + assertEquals(byteSizes, 100 /* Each row 100 bytes */ * 1000 /* total 1000 rows */); + } + + @Disabled + @Test + public void testWordCount() throws Exception { + Path wordCountPath = + new Path(testDataPath, "wc").makeQualified(fs.getUri(), fs.getWorkingDirectory()); + Path output = new Path(wordCountPath, "output"); + Path input = new Path(wordCountPath, "input"); + JobConf jobConf = new JobConf(yarnCluster.getConfig()); + jobConf.addResource(conf); + + if (!fs.mkdirs(input)) { + throw new IOException("Mkdirs failed to create " + input.toString()); + } + + DataOutputStream file = fs.create(new Path(input, "part-0")); + file.writeBytes("a a b c"); + file.close(); + + String[] args = new String[]{input.toString(), output.toString()}; + int result = ToolRunner.run(jobConf, new WordCount(), args); + assertEquals(0, result, String.format("WordCount %s", StringUtils.join(" ", args))); + + // Verify the success path. + assertTrue(fs.exists(new Path(output, CommitUtils.SUCCESS))); + assertTrue(fs.exists(new Path(output, "part-00000"))); + + Path success = new Path(output, CommitUtils.SUCCESS); + assertTrue(CommitUtils.load(fs, success).length != 0, "Success file must be not empty"); + + byte[] serializedData = CommitUtils.load(fs, new Path(output, "part-00000")); + String outputAsStr = new String(serializedData); + Map resAsMap = getResultAsMap(outputAsStr); + assertEquals(2, (int) resAsMap.get("a")); + assertEquals(1, (int) resAsMap.get("b")); + assertEquals(1, (int) resAsMap.get("c")); + } + + private Map getResultAsMap(String outputAsStr) { + Map result = new HashMap<>(); + for (String line : outputAsStr.split("\n")) { + String[] tokens = line.split("\t"); + assertTrue(tokens.length > 1, + String.format("Not enough tokens in in string %s from output %s", line, outputAsStr)); + result.put(tokens[0], Integer.parseInt(tokens[1])); + } + return result; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestCommitter.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestCommitter.java new file mode 100644 index 0000000000000..705e968aaa84b --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestCommitter.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.util.TestUtility; + +public class TestCommitter extends CommitterTestBase { + @Override + protected Configuration newConf() { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", String.format("tos://%s", TestUtility.bucket())); + return conf; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMRJob.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMRJob.java new file mode 100644 index 0000000000000..aa64730a1a9c5 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMRJob.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.tos.TOS; +import org.apache.hadoop.fs.tosfs.util.ParseUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.BeforeAll; + +import java.io.IOException; + +public class TestMRJob extends MRJobTestBase { + + @BeforeAll + public static void beforeClass() throws IOException { + // Create the new configuration and set it to the IT Case. + Configuration newConf = new Configuration(); + newConf.set("fs.defaultFS", String.format("tos://%s", TestUtility.bucket())); + // Application in yarn cluster cannot read the environment variables from user bash, so here we + // set it into the config manually. + newConf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key("tos"), + ParseUtils.envAsString(TOS.ENV_TOS_ENDPOINT, false)); + newConf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, + ParseUtils.envAsString(TOS.ENV_TOS_ACCESS_KEY_ID, false)); + newConf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, + ParseUtils.envAsString(TOS.ENV_TOS_SECRET_ACCESS_KEY, false)); + + MRJobTestBase.setConf(newConf); + // Continue to prepare the IT Case environments. + MRJobTestBase.beforeClass(); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMagicOutputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMagicOutputStream.java new file mode 100644 index 0000000000000..0844b130bebbb --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/TestMagicOutputStream.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.object.MultipartUpload; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageTestBase; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.object.Part; +import org.apache.hadoop.fs.tosfs.object.staging.StagingPart; +import org.apache.hadoop.fs.tosfs.object.staging.State; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.ExecutorService; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +public class TestMagicOutputStream extends ObjectStorageTestBase { + + private static ExecutorService threadPool; + + @BeforeAll + public static void beforeClass() { + threadPool = ThreadPools.newWorkerPool("TestMagicOutputStream-pool"); + } + + @AfterAll + public static void afterClass() { + if (!threadPool.isShutdown()) { + threadPool.shutdown(); + } + } + + private static Path path(String p) { + return new Path(p); + } + + private static Path path(Path parent, String child) { + return new Path(parent, child); + } + + @Test + public void testCreateDestKey() { + Object[][] testCases = new Object[][]{ + new Object[]{path("tos://bucket/__magic/a.txt"), "a.txt"}, + new Object[] {path("tos://bucket/output/__magic/job-1/tasks/tasks-attempt-0/a.txt"), + "output/a.txt"}, + new Object[]{path("tos://bucket/__magic/job0/task0/__base/a.txt"), "a.txt"}, + new Object[] {path("tos://bucket/output/__magic/job0/task0/__base/part/part-m-1000"), + "output/part/part-m-1000"}, + new Object[]{path("tos://bucket/a/b/c/__magic/__base/d/e/f"), "a/b/c/d/e/f"}, + new Object[]{path("tos://bucket/a/b/c/__magic/d/e/f"), "a/b/c/f"}, + }; + + for (Object[] input : testCases) { + String actualDestKey = MagicOutputStream.toDestKey((Path) input[0]); + assertEquals(actualDestKey, input[1], "Unexpected destination key."); + } + } + + @Test + public void testNonMagicPath() { + try (MagicOutputStream ignored = new TestingMagicOutputStream(path(testDir(), "non-magic"))) { + fail("Cannot create magic output stream for non-magic path"); + } catch (Exception ignored) { + } + } + + @Test + public void testWriteZeroByte() throws IOException { + Path magic = path(path(testDir(), CommitUtils.MAGIC), "zero-byte.txt"); + MagicOutputStream out = new TestingMagicOutputStream(magic); + // write zero-byte and close. + out.close(); + assertStagingFiles(0, out.stagingParts()); + + // Read and validate the .pending contents + try (InputStream in = getStorage().get(out.pendingKey()).stream()) { + byte[] data = IOUtils.toByteArray(in); + Pending commit = Pending.deserialize(data); + assertEquals(getStorage().bucket().name(), commit.bucket()); + assertEquals(out.destKey(), commit.destKey()); + assertTrue(StringUtils.isNoneEmpty(commit.uploadId())); + assertTrue(commit.createdTimestamp() > 0); + assertEquals(1, commit.parts().size()); + assertEquals(0, commit.length()); + assertEquals(out.upload().uploadId(), commit.uploadId()); + } + } + + public void testWrite(int len) throws IOException { + Path magic = path(path(testDir(), CommitUtils.MAGIC), len + ".txt"); + int uploadPartSize = 8 << 20; + int partNum = (len - 1) / (8 << 20) + 1; + + MagicOutputStream out = new TestingMagicOutputStream(magic); + byte[] data = TestUtility.rand(len); + out.write(data); + out.close(); + + assertStagingFiles(partNum, out.stagingParts()); + assertEquals(ObjectUtils.pathToKey(magic) + CommitUtils.PENDING_SUFFIX, out.pendingKey()); + + Pending commit; + try (InputStream in = getStorage().get(out.pendingKey()).stream()) { + byte[] serializedData = IOUtils.toByteArray(in); + commit = Pending.deserialize(serializedData); + assertEquals(getStorage().bucket().name(), commit.bucket()); + assertEquals(out.destKey(), commit.destKey()); + assertTrue(commit.createdTimestamp() > 0); + assertEquals(len, commit.length()); + assertEquals(out.upload().uploadId(), commit.uploadId()); + // Verify the upload part list. + assertEquals(partNum, commit.parts().size()); + if (!commit.parts().isEmpty()) { + for (int i = 0; i < partNum - 1; i += 1) { + assertEquals(uploadPartSize, commit.parts().get(i).size()); + } + Part lastPart = commit.parts().get(partNum - 1); + assertTrue(lastPart.size() > 0 && lastPart.size() <= uploadPartSize); + } + } + + // List multipart uploads + int uploadsNum = 0; + for (MultipartUpload upload : getStorage().listUploads(out.destKey())) { + uploadsNum += 1; + assertEquals(out.upload(), upload); + } + assertEquals(1L, uploadsNum); + + // The target object is still not visible for object storage. + assertNull(getStorage().head(out.destKey())); + + // Complete the upload and validate the content. + getStorage().completeUpload(out.destKey(), out.upload().uploadId(), commit.parts()); + try (InputStream in = getStorage().get(out.destKey()).stream()) { + assertArrayEquals(data, IOUtils.toByteArray(in)); + } + } + + @Test + public void testWrite1MB() throws IOException { + testWrite(1 << 20); + } + + @Test + public void testWrite24MB() throws IOException { + testWrite(24 << 20); + } + + @Test + public void testWrite100MB() throws IOException { + testWrite(100 << 20); + } + + private static void assertStagingFiles(int expectedNum, List stagings) { + assertEquals(expectedNum, stagings.size()); + for (StagingPart staging : stagings) { + assertEquals(State.CLEANED, staging.state()); + } + } + + private class TestingMagicOutputStream extends MagicOutputStream { + + TestingMagicOutputStream(Path magic) { + super(fs(), getStorage(), threadPool, tosConf(), magic); + } + + protected void persist(Path p, byte[] data) { + getStorage().put(ObjectUtils.pathToKey(p), data); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/CommitterTestBase.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/CommitterTestBase.java new file mode 100644 index 0000000000000..e9f6921445f6b --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/CommitterTestBase.java @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit.mapred; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.commit.CommitUtils; +import org.apache.hadoop.fs.tosfs.commit.Pending; +import org.apache.hadoop.fs.tosfs.commit.PendingSet; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.mapred.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public abstract class CommitterTestBase { + private Configuration conf; + private FileSystem fs; + private Path outputPath; + private TaskAttemptID taskAttempt0; + private Path reportDir; + + @BeforeEach + public void setup() throws IOException { + conf = newConf(); + fs = FileSystem.get(conf); + String uuid = UUIDUtils.random(); + outputPath = fs.makeQualified(new Path("/test/" + uuid)); + taskAttempt0 = JobSuite.createTaskAttemptId(randomTrimmedJobId(), 0); + + reportDir = fs.makeQualified(new Path("/report/" + uuid)); + fs.mkdirs(reportDir); + conf.set(org.apache.hadoop.fs.tosfs.commit.Committer.COMMITTER_SUMMARY_REPORT_DIR, + reportDir.toUri().toString()); + } + + protected abstract Configuration newConf(); + + @AfterEach + public void teardown() { + CommonUtils.runQuietly(() -> fs.delete(outputPath, true)); + IOUtils.closeStream(fs); + } + + @BeforeAll + public static void beforeClass() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @AfterAll + public static void afterClass() { + if (!TestEnv.checkTestEnabled()) { + return; + } + + List committerThreads = Thread.getAllStackTraces().keySet() + .stream() + .map(Thread::getName) + .filter(n -> n.startsWith(org.apache.hadoop.fs.tosfs.commit.Committer.THREADS_PREFIX)) + .collect(Collectors.toList()); + assertTrue(committerThreads.isEmpty(), "Outstanding committer threads"); + } + + private static String randomTrimmedJobId() { + SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd"); + return String.format("%s%04d_%04d", formatter.format(new Date()), + (long) (Math.random() * 1000), + (long) (Math.random() * 1000)); + } + + private static String randomFormedJobId() { + return String.format("job_%s", randomTrimmedJobId()); + } + + @Test + public void testSetupJob() throws IOException { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + + // Setup job. + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + } + + @Test + public void testSetupJobWithOrphanPaths() throws IOException, InterruptedException { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + + // Orphan success marker. + Path successPath = CommitUtils.successMarker(outputPath); + CommitUtils.save(fs, successPath, new byte[]{}); + assertTrue(fs.exists(successPath), "The success file should exist."); + + // Orphan job path. + Path jobPath = CommitUtils.magicJobPath(suite.committer().jobId(), outputPath); + fs.mkdirs(jobPath); + assertTrue(fs.exists(jobPath), "The job path should exist."); + Path subPath = new Path(jobPath, "tmp.pending"); + CommitUtils.save(fs, subPath, new byte[]{}); + assertTrue(fs.exists(subPath), "The sub path under job path should be existing."); + FileStatus jobPathStatus = fs.getFileStatus(jobPath); + + Thread.sleep(1000L); + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + + assertFalse(fs.exists(successPath), "Should have deleted the success path"); + assertTrue(fs.exists(jobPath), "Should have re-created the job path"); + assertFalse(fs.exists(subPath), "Should have deleted the sub path under the job path"); + } + + @Test + public void testSetupTask() throws IOException { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + + // Remaining attempt task path. + Path taskAttemptBasePath = + CommitUtils.magicTaskAttemptBasePath(suite.taskAttemptContext(), outputPath); + Path subTaskAttemptPath = new Path(taskAttemptBasePath, "tmp.pending"); + CommitUtils.save(fs, subTaskAttemptPath, new byte[]{}); + assertTrue(fs.exists(taskAttemptBasePath)); + assertTrue(fs.exists(subTaskAttemptPath)); + + // Setup job. + suite.setupJob(); + suite.assertHasMagicKeys(); + // It will clear all the job path once we've set up the job. + assertFalse(fs.exists(taskAttemptBasePath)); + assertFalse(fs.exists(subTaskAttemptPath)); + + // Left some the task paths. + CommitUtils.save(fs, subTaskAttemptPath, new byte[]{}); + assertTrue(fs.exists(taskAttemptBasePath)); + assertTrue(fs.exists(subTaskAttemptPath)); + + // Setup task. + suite.setupTask(); + assertFalse(fs.exists(subTaskAttemptPath)); + } + + @Test + public void testCommitTask() throws Exception { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + // Setup job + suite.setupJob(); + suite.dumpObjectStorage(); + suite.assertHasMagicKeys(); + + // Setup task + suite.setupTask(); + + // Write records. + suite.assertNoMagicPendingFile(); + suite.assertMultipartUpload(0); + suite.writeOutput(); + suite.dumpObjectStorage(); + suite.assertHasMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(1); + // Assert the pending file content. + Path pendingPath = suite.magicPendingPath(); + byte[] pendingData = CommitUtils.load(suite.fs(), pendingPath); + Pending pending = Pending.deserialize(pendingData); + assertEquals(suite.destPartKey(), pending.destKey()); + assertEquals(20, pending.length()); + assertEquals(1, pending.parts().size()); + + // Commit the task. + suite.commitTask(); + + // Verify the pending set file. + suite.assertHasPendingSet(); + // Assert the pending set file content. + Path pendingSetPath = suite.magicPendingSetPath(); + byte[] pendingSetData = CommitUtils.load(suite.fs(), pendingSetPath); + PendingSet pendingSet = PendingSet.deserialize(pendingSetData); + assertEquals(suite.job().getJobID().toString(), pendingSet.jobId()); + assertEquals(1, pendingSet.commits().size()); + assertEquals(pending, pendingSet.commits().get(0)); + assertEquals(pendingSet.extraData(), ImmutableMap.of(CommitUtils.TASK_ATTEMPT_ID, + suite.taskAttemptContext().getTaskAttemptID().toString())); + + // Complete the multipart upload and verify the results. + ObjectStorage storage = suite.storage(); + storage.completeUpload(pending.destKey(), pending.uploadId(), pending.parts()); + suite.verifyPartContent(); + } + + @Test + public void testAbortTask() throws Exception { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + + // Pre-check before the output write. + suite.assertNoMagicPendingFile(); + suite.assertMultipartUpload(0); + + // Execute the output write. + suite.writeOutput(); + + // Post-check after the output write. + suite.assertHasMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(1); + // Assert the pending file content. + Path pendingPath = suite.magicPendingPath(); + byte[] pendingData = CommitUtils.load(suite.fs(), pendingPath); + Pending pending = Pending.deserialize(pendingData); + assertEquals(suite.destPartKey(), pending.destKey()); + assertEquals(20, pending.length()); + assertEquals(1, pending.parts().size()); + + // Abort the task. + suite.abortTask(); + + // Verify the state after aborting task. + suite.assertNoMagicPendingFile(); + suite.assertNoMagicMultipartUpload(); + suite.assertMultipartUpload(0); + suite.assertNoTaskAttemptPath(); + } + + @Test + public void testCommitJob() throws Exception { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job. + suite.assertNoPartFiles(); + suite.commitJob(); + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.assertSummaryReport(reportDir); + suite.verifyPartContent(); + } + + + @Test + public void testCommitJobFailed() throws Exception { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job. + suite.assertNoPartFiles(); + suite.commitJob(); + } + + @Test + public void testTaskCommitAfterJobCommit() throws Exception { + JobSuite suite = JobSuite.create(conf, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + suite.setupJob(); + suite.setupTask(); + suite.writeOutput(); + suite.commitTask(); + + // Commit the job + suite.assertNoPartFiles(); + suite.commitJob(); + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.verifyPartContent(); + + // Commit the task again. + assertThrows(FileNotFoundException.class, suite::commitTask); + } + + @Test + public void testTaskCommitWithConsistentJobId() throws Exception { + Configuration config = newConf(); + String consistentJobId = randomFormedJobId(); + config.set(CommitUtils.SPARK_WRITE_UUID, consistentJobId); + JobSuite suite = JobSuite.create(config, taskAttempt0, outputPath); + Assumptions.assumeFalse(suite.skipTests()); + + // By now, we have two "jobId"s, one is spark uuid, and the other is the jobId in taskAttempt. + // The job committer will adopt the former. + suite.setupJob(); + + // Next, we clear spark uuid, and set the jobId of taskAttempt to another value. In this case, + // the committer will take the jobId of taskAttempt as the final jobId, which is not consistent + // with the one that committer holds. + config.unset(CommitUtils.SPARK_WRITE_UUID); + JobConf jobConf = new JobConf(config); + String anotherJobId = randomTrimmedJobId(); + TaskAttemptID taskAttemptId1 = + JobSuite.createTaskAttemptId(anotherJobId, JobSuite.DEFAULT_APP_ATTEMPT_ID); + final TaskAttemptContext attemptContext1 = + JobSuite.createTaskAttemptContext(jobConf, taskAttemptId1, JobSuite.DEFAULT_APP_ATTEMPT_ID); + + assertThrows(IllegalArgumentException.class, () -> suite.setupTask(attemptContext1), + "JobId set in the context"); + + // Even though we use another taskAttempt, as long as we ensure the spark uuid is consistent, + // the jobId in committer is consistent. + config.set(CommitUtils.SPARK_WRITE_UUID, consistentJobId); + config.set(FileOutputFormat.OUTDIR, outputPath.toString()); + jobConf = new JobConf(config); + anotherJobId = randomTrimmedJobId(); + TaskAttemptID taskAttemptId2 = + JobSuite.createTaskAttemptId(anotherJobId, JobSuite.DEFAULT_APP_ATTEMPT_ID); + TaskAttemptContext attemptContext2 = + JobSuite.createTaskAttemptContext(jobConf, taskAttemptId2, JobSuite.DEFAULT_APP_ATTEMPT_ID); + + suite.setupTask(attemptContext2); + // Write output must use the same task context with setup task. + suite.writeOutput(attemptContext2); + // Commit task must use the same task context with setup task. + suite.commitTask(attemptContext2); + suite.assertPendingSetAtRightLocation(); + + // Commit the job + suite.assertNoPartFiles(); + suite.commitJob(); + + // Verify the output. + suite.assertNoMagicMultipartUpload(); + suite.assertNoMagicObjectKeys(); + suite.assertSuccessMarker(); + suite.verifyPartContent(); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/JobSuite.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/JobSuite.java new file mode 100644 index 0000000000000..40984e966e9ac --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/JobSuite.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit.mapred; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.commit.BaseJobSuite; +import org.apache.hadoop.fs.tosfs.commit.CommitUtils; +import org.apache.hadoop.fs.tosfs.commit.SuccessData; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.JobContextImpl; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.mapred.TaskAttemptContextImpl; +import org.apache.hadoop.mapred.TaskAttemptID; +import org.apache.hadoop.mapred.TextOutputFormat; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.net.NetUtils; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public final class JobSuite extends BaseJobSuite { + private final JobContext jobContext; + private final TaskAttemptContext taskAttemptContext; + private final Committer committer; + + private JobSuite(FileSystem fs, JobConf conf, + TaskAttemptID taskAttemptId, int appAttemptId, Path outputPath) + throws IOException { + setFs(fs); + // Initialize the job instance. + setJob(Job.getInstance(conf)); + job().setJobID(JobID.forName(CommitUtils.buildJobId(conf, taskAttemptId.getJobID()))); + this.jobContext = createJobContext(conf, taskAttemptId); + this.taskAttemptContext = createTaskAttemptContext(conf, taskAttemptId, appAttemptId); + setJobId(CommitUtils.buildJobId(jobContext)); + + // Set job output directory. + FileOutputFormat.setOutputPath(conf, outputPath); + setOutputPath(outputPath); + setObjectStorage(ObjectStorageFactory.create(outputPath.toUri().getScheme(), + outputPath.toUri().getAuthority(), conf)); + + // Initialize committer. + this.committer = new Committer(); + this.committer.setupTask(taskAttemptContext); + } + + public static JobSuite create(Configuration conf, TaskAttemptID taskAttemptId, Path outDir) + throws IOException { + FileSystem fs = outDir.getFileSystem(conf); + return new JobSuite(fs, new JobConf(conf), taskAttemptId, DEFAULT_APP_ATTEMPT_ID, outDir); + } + + public static TaskAttemptID createTaskAttemptId(String trimmedJobId, int attemptId) { + String attempt = String.format("attempt_%s_m_000000_%d", trimmedJobId, attemptId); + return TaskAttemptID.forName(attempt); + } + + public static JobContext createJobContext(JobConf jobConf, TaskAttemptID taskAttemptId) { + return new JobContextImpl(jobConf, taskAttemptId.getJobID()); + } + + public static TaskAttemptContext createTaskAttemptContext( + JobConf jobConf, TaskAttemptID taskAttemptId, int appAttemptId) throws IOException { + // Set the key values for job configuration. + jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptId.toString()); + jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, appAttemptId); + jobConf.set("mapred.output.committer.class", + Committer.class.getName()); // 2x and 3x newApiCommitter=false. + return new TaskAttemptContextImpl(jobConf, taskAttemptId); + } + + public void setupJob() throws IOException { + committer.setupJob(jobContext); + } + + public void setupTask() throws IOException { + committer.setupTask(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void setupTask(TaskAttemptContext taskAttemptCxt) throws IOException { + committer.setupTask(taskAttemptCxt); + } + + public void writeOutput() throws Exception { + writeOutput(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void writeOutput(TaskAttemptContext taskAttemptCxt) throws Exception { + RecordWriter writer = new TextOutputFormat<>().getRecordWriter(fs(), + taskAttemptCxt.getJobConf(), + CommitUtils.buildJobId(taskAttemptCxt), + taskAttemptCxt.getProgressible()); + NullWritable nullKey = NullWritable.get(); + NullWritable nullVal = NullWritable.get(); + Object[] keys = new Object[]{KEY_1, nullKey, null, nullKey, null, KEY_2}; + Object[] vals = new Object[]{VAL_1, nullVal, null, null, nullVal, VAL_2}; + try { + assertEquals(keys.length, vals.length); + for (int i = 0; i < keys.length; i++) { + writer.write(keys[i], vals[i]); + } + } finally { + writer.close(Reporter.NULL); + } + } + + public boolean needsTaskCommit() throws IOException { + return committer.needsTaskCommit(taskAttemptContext); + } + + public void commitTask() throws IOException { + committer.commitTask(taskAttemptContext); + } + + // This method simulates the scenario that the job may set up task with a different + // taskAttemptContext, e.g., for a spark job. + public void commitTask(TaskAttemptContext taskAttemptCxt) throws IOException { + committer.commitTask(taskAttemptCxt); + } + + public void abortTask() throws IOException { + committer.abortTask(taskAttemptContext); + } + + public void commitJob() throws IOException { + committer.commitJob(jobContext); + } + + @Override + public Path magicPartPath() { + return new Path(committer.getWorkPath(), committer.jobId()); + } + + @Override + public Path magicPendingSetPath() { + return CommitUtils.magicTaskPendingSetPath(taskAttemptContext, outputPath()); + } + + public TaskAttemptContext taskAttemptContext() { + return taskAttemptContext; + } + + public Committer committer() { + return committer; + } + + @Override + public void assertNoTaskAttemptPath() throws IOException { + Path path = CommitUtils.magicTaskAttemptBasePath(taskAttemptContext, outputPath()); + assertFalse(fs().exists(path), "Task attempt path should be not existing"); + String pathToKey = ObjectUtils.pathToKey(path); + assertNull(storage().head(pathToKey), "Should have no task attempt path key"); + } + + @Override + protected boolean skipTests() { + return storage().bucket().isDirectory(); + } + + @Override + public void assertSuccessMarker() throws IOException { + Path succPath = CommitUtils.successMarker(outputPath()); + assertTrue(fs().exists(succPath), String.format("%s should be exists", succPath)); + SuccessData successData = SuccessData.deserialize(CommitUtils.load(fs(), succPath)); + assertEquals(SuccessData.class.getName(), successData.name()); + assertTrue(successData.success()); + assertEquals(NetUtils.getHostname(), successData.hostname()); + assertEquals(CommitUtils.COMMITTER_NAME, successData.committer()); + assertEquals( + String.format("Task committer %s", taskAttemptContext.getTaskAttemptID()), + successData.description()); + assertEquals(job().getJobID().toString(), successData.jobId()); + assertEquals(1, successData.filenames().size()); + assertEquals(destPartKey(), successData.filenames().get(0)); + } + + @Override + public void assertSummaryReport(Path reportDir) throws IOException { + Path reportPath = CommitUtils.summaryReport(reportDir, job().getJobID().toString()); + assertTrue(fs().exists(reportPath), String.format("%s should be exists", reportPath)); + SuccessData reportData = SuccessData.deserialize(CommitUtils.load(fs(), reportPath)); + assertEquals(SuccessData.class.getName(), reportData.name()); + assertTrue(reportData.success()); + assertEquals(NetUtils.getHostname(), reportData.hostname()); + assertEquals(CommitUtils.COMMITTER_NAME, reportData.committer()); + assertEquals(String.format("Task committer %s", taskAttemptContext.getTaskAttemptID()), + reportData.description()); + assertEquals(job().getJobID().toString(), reportData.jobId()); + assertEquals(1, reportData.filenames().size()); + assertEquals(destPartKey(), reportData.filenames().get(0)); + assertEquals("clean", reportData.diagnostics().get("stage")); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/TestCommitter.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/TestCommitter.java new file mode 100644 index 0000000000000..6192c99d68e48 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/commit/mapred/TestCommitter.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.commit.mapred; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.util.TestUtility; + +public class TestCommitter extends CommitterTestBase { + @Override + protected Configuration newConf() { + Configuration conf = new Configuration(); + conf.set("fs.defaultFS", String.format("tos://%s", TestUtility.bucket())); + return conf; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestChecksum.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestChecksum.java new file mode 100644 index 0000000000000..8f82dc92077ea --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestChecksum.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.tosfs.RawFileSystem; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; + +public class TestChecksum extends AbstractFSContractTestBase { + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } + + private Path testCreateNewFile(String fileName, byte[] data, boolean useBuilder) + throws IOException { + describe("Foundational 'create a file' test, using builder API=" + useBuilder); + Path path = path(fileName, useBuilder); + + writeDataset(getFileSystem(), path, data, data.length, 1024 * 1024, false, useBuilder); + ContractTestUtils.verifyFileContents(getFileSystem(), path, data); + + return path; + } + + private Path path(String filepath, boolean useBuilder) throws IOException { + return super.path(filepath + (useBuilder ? "" : "-builder")); + } + + @Test + public void testCheckSumWithSimplePut() throws IOException { + byte[] data = dataset(256, 'a', 'z'); + Path path1 = testCreateNewFile("file1", data, true); + Path path2 = testCreateNewFile("file2", data, true); + Path path3 = testCreateNewFile("file3", dataset(512, 'a', 'z'), true); + + FileChecksum expected = getFileSystem().getFileChecksum(path1); + assertEquals(expected, getFileSystem().getFileChecksum(path2), + "Checksum value should be same among objects with same content"); + assertEquals(expected, getFileSystem().getFileChecksum(path1), + "Checksum value should be same among multiple call for same object"); + assertNotEquals(expected, getFileSystem().getFileChecksum(path3), + "Checksum value should be different for different objects with different content"); + + Path renamed = path("renamed"); + getFileSystem().rename(path1, renamed); + assertEquals(expected, getFileSystem().getFileChecksum(renamed), + "Checksum value should not change after rename"); + } + + @Test + public void testCheckSumShouldSameViaPutAndMPU() throws IOException { + byte[] data = TestUtility.rand(11 << 20); + + // simple put + Path singleFile = path("singleFile"); + RawFileSystem fs = (RawFileSystem) getFileSystem(); + fs.storage().put(ObjectUtils.pathToKey(singleFile), data); + + // MPU upload data, the default threshold is 10MB + Path mpuFile = testCreateNewFile("mpuFile", data, true); + + assertEquals(fs.getFileChecksum(singleFile), fs.getFileChecksum(mpuFile)); + } + + @Test + public void testDisableCheckSum() throws IOException { + Path path1 = testCreateNewFile("file1", dataset(256, 'a', 'z'), true); + Path path2 = testCreateNewFile("file2", dataset(512, 'a', 'z'), true); + assertNotEquals(getFileSystem().getFileChecksum(path1), getFileSystem().getFileChecksum(path2)); + + // disable checksum + Configuration newConf = new Configuration(getFileSystem().getConf()); + newConf.setBoolean(ConfKeys.FS_CHECKSUM_ENABLED.key("tos"), false); + FileSystem newFS = FileSystem.get(newConf); + + assertEquals(newFS.getFileChecksum(path1), newFS.getFileChecksum(path2)); + } + + @Test + public void testGetDirChecksum() throws IOException { + FileSystem fs = getFileSystem(); + + Path dir1 = path("dir1", true); + Path dir2 = path("dir2", true); + assertPathDoesNotExist("directory already exists", dir1); + assertPathDoesNotExist("directory already exists", dir2); + fs.mkdirs(dir1); + + assertThrows(FileNotFoundException.class, () -> getFileSystem().getFileChecksum(dir1), + "Path is not a file"); + assertThrows(FileNotFoundException.class, () -> getFileSystem().getFileChecksum(dir2), + "No such file or directory"); + + assertDeleted(dir1, false); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestCreate.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestCreate.java new file mode 100644 index 0000000000000..a9b4f0833980f --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestCreate.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCreateTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.BeforeAll; + +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestCreate extends AbstractContractCreateTest { + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDelete.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDelete.java new file mode 100644 index 0000000000000..299167b3dd2ce --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDelete.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static org.apache.hadoop.fs.tosfs.object.ObjectTestUtils.assertDirExist; +import static org.apache.hadoop.fs.tosfs.object.ObjectTestUtils.assertObjectNotExist; + +public class TestDelete extends AbstractContractDeleteTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } + + @Test + public void testParentDirCreatedAfterDeleteSubChildren() throws IOException { + Path path = path("testParentDirCreatedAfterDeleteSubChildren/"); + Path file1 = new Path(path, "f1"); + Path file2 = new Path(path, "f2"); + ContractTestUtils.writeTextFile(getFileSystem(), file1, + "the first file", true); + ContractTestUtils.writeTextFile(getFileSystem(), file2, + "the second file", true); + assertPathExists("file1 not created", file1); + assertPathExists("file1 not created", file2); + + assertObjectNotExist(path, false); + assertObjectNotExist(path, true); + + assertDeleted(file1, false); + assertPathExists("parent path should exist", path); + + assertObjectNotExist(path, false); + assertDirExist(path); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDistCp.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDistCp.java new file mode 100644 index 0000000000000..a87ac6e690b93 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestDistCp.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestDistCp extends AbstractContractDistCpTest { + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } + + // ignore this test case as there is intermittent IllegalStateException issue + @Test + public void testDistCpWithIterator() { + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatus.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatus.java new file mode 100644 index 0000000000000..40c8027ada24d --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatus.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.RawFileStatus; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.Constants; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; + +public abstract class TestGetFileStatus extends AbstractContractGetFileStatusTest { + + private final boolean getFileStatusEnabled; + + public TestGetFileStatus(boolean getFileStatusEnabled) { + this.getFileStatusEnabled = getFileStatusEnabled; + } + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + conf.setBoolean(TosKeys.FS_TOS_GET_FILE_STATUS_ENABLED, getFileStatusEnabled); + conf.setBoolean(ConfKeys.FS_ASYNC_CREATE_MISSED_PARENT.key("tos"), false); + return new TosContract(conf); + } + + @Test + public void testDirModificationTimeShouldNotBeZero() throws IOException { + FileSystem fs = getFileSystem(); + Path path = getContract().getTestPath(); + fs.delete(path, true); + + Path subfolder = path.suffix('/' + this.methodName.getMethodName() + "-" + UUIDUtils.random()); + mkdirs(subfolder); + + FileStatus fileStatus = fs.getFileStatus(path); + assertTrue(fileStatus.getModificationTime() > 0); + } + + @Test + public void testThrowExceptionWhenListStatusForNonExistPath() { + FileSystem fs = getFileSystem(); + Path path = getContract().getTestPath(); + + assertThrows(FileNotFoundException.class, + () -> fs.listStatusIterator(new Path(path, "testListStatusForNonExistPath")), + "Path doesn't exist"); + } + + @Test + public void testPathStatNonexistentFile() { + FileSystem fs = getFileSystem(); + // working dir does not exist. + Path file = new Path(getContract().getTestPath(), this.methodName.getMethodName()); + assertThrows(FileNotFoundException.class, () -> fs.getFileStatus(file), "Path doesn't exist"); + } + + @Test + public void testPathStatExistentFile() throws IOException { + FileSystem fs = getFileSystem(); + Path file = new Path(getContract().getTestPath(), this.methodName.getMethodName()); + + int size = 1 << 20; + byte[] data = dataset(size, 'a', 'z'); + createFile(fs, file, true, data); + FileStatus status = fs.getFileStatus(file); + assertTrue(status.isFile()); + assertTrue(status.getModificationTime() > 0); + assertEquals(size, status.getLen()); + } + + @Test + public void testPathStatEmptyDirectory() throws IOException { + FileSystem fs = getFileSystem(); + Path workingPath = new Path(getContract().getTestPath(), this.methodName.getMethodName()); + mkdirs(workingPath); + + FileStatus dirStatus = fs.getFileStatus(workingPath); + assertTrue(dirStatus.isDirectory()); + assertTrue(dirStatus.getModificationTime() > 0); + if (dirStatus instanceof RawFileStatus) { + assertArrayEquals(Constants.MAGIC_CHECKSUM, ((RawFileStatus) dirStatus).checksum()); + } + } + + @Test + public void testPathStatWhenCreateSubDir() throws IOException { + FileSystem fs = getFileSystem(); + Path workintPath = new Path(getContract().getTestPath(), this.methodName.getMethodName()); + // create sub directory directly. + Path subDir = new Path(workintPath, UUIDUtils.random()); + mkdirs(subDir); + assertTrue(fs.getFileStatus(subDir).isDirectory()); + + // can get FileStatus of working dir. + assertTrue(fs.getFileStatus(workintPath).isDirectory()); + // delete sub directory. + fs.delete(subDir, true); + // still cat get FileStatus of working dir. + assertTrue(fs.getFileStatus(workintPath).isDirectory()); + } + + @Test + public void testPathStatDirNotExistButSubFileExist() throws IOException { + FileSystem fs = getFileSystem(); + // working dir does not exist. + Path workintPath = new Path(getContract().getTestPath(), this.methodName.getMethodName()); + assertThrows(FileNotFoundException.class, () -> fs.getFileStatus(workintPath), + "Path doesn't exist"); + + // create sub file in working dir directly. + Path file = workintPath.suffix('/' + UUIDUtils.random()); + touch(fs, file); + + // can get FileStatus of working dir. + assertTrue(fs.getFileStatus(workintPath).isDirectory()); + + // delete sub file, will create parent directory. + fs.delete(file, false); + assertTrue(fs.getFileStatus(workintPath).isDirectory()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusDisabled.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusDisabled.java new file mode 100644 index 0000000000000..0e537c1eac6f2 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusDisabled.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +public class TestGetFileStatusDisabled extends TestGetFileStatus { + public TestGetFileStatusDisabled() { + super(false); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusEnabled.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusEnabled.java new file mode 100644 index 0000000000000..3746a0ecc1ef2 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestGetFileStatusEnabled.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +public class TestGetFileStatusEnabled extends TestGetFileStatus { + public TestGetFileStatusEnabled() { + super(true); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestMkdir.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestMkdir.java new file mode 100644 index 0000000000000..a303788a7b6a5 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestMkdir.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; + +public class TestMkdir extends AbstractContractMkdirTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestOpen.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestOpen.java new file mode 100644 index 0000000000000..baa5646c72e1a --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestOpen.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractOpenTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.RawFileStatus; +import org.apache.hadoop.fs.tosfs.RawFileSystem; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.object.exceptions.ChecksumMismatchException; +import org.apache.hadoop.fs.tosfs.util.Range; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; + +public class TestOpen extends AbstractContractOpenTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } + + @Test + public void testOpenAExpiredFile() throws IOException { + Path file = path("testOpenAOutageFile"); + FileSystem fs = getFileSystem(); + byte[] data = dataset(256, 'a', 'z'); + writeDataset(fs, file, data, data.length, 1024 * 1024, true); + + FileStatus fileStatus = fs.getFileStatus(file); + if (fs instanceof RawFileSystem) { + byte[] expectChecksum = ((RawFileStatus) fileStatus).checksum(); + FSDataInputStream fsDataInputStream = + ((RawFileSystem) fs).open(file, expectChecksum, Range.of(0, Long.MAX_VALUE)); + fsDataInputStream.close(); + + // update the file + data = dataset(512, 'a', 'z'); + writeDataset(fs, file, data, data.length, 1024 * 1024, true); + + FSDataInputStream newStream = + ((RawFileSystem) fs).open(file, expectChecksum, Range.of(0, Long.MAX_VALUE)); + assertThrows(ChecksumMismatchException.class, () -> newStream.read(), "the file is expired"); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRename.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRename.java new file mode 100644 index 0000000000000..adddd5cd2b37c --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRename.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractRenameTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; + +public class TestRename extends AbstractContractRenameTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + // Add follow two keys into hadoop configuration. + String defaultScheme = FileSystem.getDefaultUri(conf).getScheme(); + Configuration newConf = new Configuration(conf); + newConf.setLong(ConfKeys.FS_MULTIPART_SIZE.key(defaultScheme), + ConfKeys.FS_MULTIPART_SIZE_DEFAULT); + newConf.setLong(ConfKeys.FS_MULTIPART_THRESHOLD.key(defaultScheme), + ConfKeys.FS_MULTIPART_THRESHOLD_DEFAULT); + + return new TosContract(newConf); + } + + @Test + public void testSucceedRenameFile() throws IOException { + describe("check if source file and dest file exists when succeed to rename"); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + FileSystem fs = getFileSystem(); + byte[] data = dataset(256, 'a', 'z'); + writeDataset(fs, renameSrc, data, data.length, 1024 * 1024, true); + boolean renamed = rename(renameSrc, renameDest); + assertTrue(renamed); + assertPathExists("dest file should exist when succeed to rename", renameDest); + assertPathDoesNotExist("source file should not exist when succeed to rename", renameSrc); + } + + @Test + public void testSucceedRenameDir() throws IOException { + describe("check if source dir and dest dir exists when succeed to rename"); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + int fileNums = 10; + int byteSize = 10 << 20; // trigger multipart upload + FileSystem fs = getFileSystem(); + for (int i = 0; i < fileNums; i++) { + byte[] data = dataset(byteSize >> i, 'a', 'z'); + writeDataset(fs, new Path(renameSrc, String.format("src%02d", i)), data, data.length, + 1024 * 1024, true); + } + boolean renamed = rename(renameSrc, renameDest); + assertTrue(renamed); + for (int i = 0; i < fileNums; i++) { + Path srcFilePath = new Path(renameSrc, String.format("src%02d", i)); + Path dstFilePath = new Path(renameDest, String.format("src%02d", i)); + byte[] data = dataset(byteSize >> i, 'a', 'z'); + assertPathExists("dest file should exist when succeed to rename", dstFilePath); + assertPathDoesNotExist("source file should not exist when succeed to rename", srcFilePath); + try (InputStream is = fs.open(dstFilePath)) { + assertArrayEquals(data, IOUtils.toByteArray(is)); + } + } + } + + @Test + public void testFailedRename() throws IOException { + describe("check if source file and dest file exists when failed to rename"); + Path renameSrc = path("src/renameSrc"); + Path renameDest = path("src/renameSrc/renameDst"); + FileSystem fs = getFileSystem(); + byte[] data = dataset(256, 'a', 'z'); + writeDataset(fs, renameSrc, data, data.length, 1024 * 1024, true); + boolean renamed; + try { + renamed = rename(renameSrc, renameDest); + } catch (IOException e) { + renamed = false; + } + assertFalse(renamed); + assertPathExists("source file should exist when failed to rename", renameSrc); + assertPathDoesNotExist("dest file should not exist when failed to rename", renameDest); + } + + @Test + public void testRenameSmallFile() throws IOException { + testRenameFileByPut(1 << 20); + testRenameFileByPut(3 << 20); + } + + @Test + public void testRenameLargeFile() throws IOException { + testRenameFileByUploadParts(16 << 20); + testRenameFileByUploadParts(10 << 20); + } + + @Test + public void testRenameDirWithSubFileAndSubDir() throws IOException { + FileSystem fs = getFileSystem(); + + Path renameSrc = path("dir/renameSrc"); + Path renameDest = path("dir/renameDst"); + int size = 1024; + byte[] data = dataset(size, 'a', 'z'); + String fileName = "file.txt"; + writeDataset(fs, new Path(renameSrc, fileName), data, data.length, 1024, true); + + String dirName = "dir"; + Path dirPath = new Path(renameSrc, dirName); + mkdirs(dirPath); + assertPathExists("source dir should exist", dirPath); + + boolean renamed = fs.rename(renameSrc, renameDest); + + assertTrue(renamed); + Path srcFilePath = new Path(renameSrc, fileName); + Path dstFilePath = new Path(renameDest, fileName); + assertPathExists("dest file should exist when succeed to rename", dstFilePath); + assertPathDoesNotExist("source file should not exist when succeed to rename", srcFilePath); + + assertPathExists("dest dir should exist when succeed to rename", new Path(renameDest, dirName)); + assertPathDoesNotExist("source dir should not exist when succeed to rename", + new Path(renameSrc, dirName)); + + ContractTestUtils.cleanup("TEARDOWN", fs, getContract().getTestPath()); + } + + public void testRenameFileByPut(int size) throws IOException { + describe("check if use put method when rename file"); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + FileSystem fs = getFileSystem(); + + byte[] data = dataset(size, 'a', 'z'); + String fileName = String.format("%sMB.txt", size >> 20); + writeDataset(fs, new Path(renameSrc, fileName), data, data.length, 1024 * 1024, true); + boolean renamed = fs.rename(renameSrc, renameDest); + + assertTrue(renamed); + Path srcFilePath = new Path(renameSrc, fileName); + Path dstFilePath = new Path(renameDest, fileName); + assertPathExists("dest file should exist when succeed to rename", dstFilePath); + assertPathDoesNotExist("source file should not exist when succeed to rename", srcFilePath); + + assertPathExists("dest src should exist when succeed to rename", renameDest); + assertPathDoesNotExist("source src should not exist when succeed to rename", renameSrc); + + try (InputStream is = fs.open(dstFilePath)) { + assertArrayEquals(data, IOUtils.toByteArray(is)); + } + ContractTestUtils.cleanup("TEARDOWN", fs, getContract().getTestPath()); + } + + @Test + public void testCreateParentDirAfterRenameSubFile() throws IOException { + FileSystem fs = getFileSystem(); + + Path srcDir = path("srcDir"); + Path destDir = path("destDir"); + + assertPathDoesNotExist("Src dir should not exist", srcDir); + assertPathDoesNotExist("Dest dir should not exist", destDir); + int size = 1 << 20; + byte[] data = dataset(size, 'a', 'z'); + String fileName = String.format("%sMB.txt", size >> 20); + Path srcFile = new Path(srcDir, fileName); + Path destFile = new Path(destDir, fileName); + writeDataset(fs, srcFile, data, data.length, 1024 * 1024, true); + + assertPathExists("Src file should exist", srcFile); + assertPathExists("Src dir should exist", srcDir); + + mkdirs(destDir); + assertPathExists("Dest dir should exist", destDir); + + boolean renamed = fs.rename(srcFile, destFile); + assertTrue(renamed); + + assertPathExists("Dest file should exist", destFile); + assertPathExists("Dest dir should exist", destDir); + assertPathDoesNotExist("Src file should not exist", srcFile); + assertPathExists("Src dir should exist", srcDir); + } + + @Test + public void testCreateParentDirAfterRenameSubDir() throws IOException { + FileSystem fs = getFileSystem(); + + Path srcDir = path("srcDir"); + Path destDir = path("destDir"); + + assertPathDoesNotExist("Src dir should not exist", srcDir); + assertPathDoesNotExist("Dest dir should not exist", destDir); + + String subDirName = String.format("subDir"); + Path srcSubDir = new Path(srcDir, subDirName); + Path destDestDir = new Path(destDir, subDirName); + mkdirs(srcSubDir); + + assertPathExists("Src sub dir should exist", srcSubDir); + assertPathExists("Src dir should exist", srcDir); + + mkdirs(destDir); + assertPathExists("Dest dir should exist", destDir); + + boolean renamed = fs.rename(srcSubDir, destDestDir); + assertTrue(renamed); + + assertPathExists("Dest sub dir should exist", destDestDir); + assertPathExists("Dest dir should exist", destDir); + assertPathDoesNotExist("Src sub dir should not exist", srcSubDir); + assertPathExists("Src sub dir should exist", srcDir); + } + + public void testRenameFileByUploadParts(int size) throws IOException { + describe("check if use upload parts method when rename file"); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + FileSystem fs = getFileSystem(); + + byte[] data = dataset(size, 'a', 'z'); + String fileName = String.format("%sMB.txt", size >> 20); + writeDataset(fs, new Path(renameSrc, fileName), data, data.length, 1024 * 1024, true); + boolean renamed = fs.rename(renameSrc, renameDest); + + assertTrue(renamed); + Path srcFilePath = new Path(renameSrc, fileName); + Path dstFilePath = new Path(renameDest, fileName); + assertPathExists("dest file should exist when succeed to rename", dstFilePath); + assertPathDoesNotExist("source file should not exist when succeed to rename", srcFilePath); + + try (InputStream is = fs.open(dstFilePath)) { + assertArrayEquals(data, IOUtils.toByteArray(is)); + } + ContractTestUtils.cleanup("TEARDOWN", fs, getContract().getTestPath()); + } + + @Disabled + @Test + public void testRenameFileUnderFileSubdir() { + } + + @Disabled + @Test + public void testRenameFileUnderFile() { + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRootDir.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRootDir.java new file mode 100644 index 0000000000000..047f2687ae874 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestRootDir.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; + +public class TestRootDir extends AbstractContractRootDirectoryTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestSeek.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestSeek.java new file mode 100644 index 0000000000000..72f8a6cadf0ac --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestSeek.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSeekTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; + +public class TestSeek extends AbstractContractSeekTest { + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestUnbuffer.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestUnbuffer.java new file mode 100644 index 0000000000000..517f154077d05 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestUnbuffer.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractUnbufferTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.junit.jupiter.api.BeforeAll; + +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestUnbuffer extends AbstractContractUnbufferTest { + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestXAttr.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestXAttr.java new file mode 100644 index 0000000000000..b4b11e422bab5 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TestXAttr.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.common.Bytes; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +public class TestXAttr extends AbstractFSContractTestBase { + private static final String XATTR_NAME = "xAttrName"; + private static final byte[] XATTR_VALUE = "xAttrValue".getBytes(); + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new TosContract(conf); + } + + @Test + public void testGetNonExistedXAttr() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testSetAndGet/file"); + fs.create(path).close(); + + fs.setXAttr(path, XATTR_NAME, XATTR_VALUE); + assertThrows(IOException.class, () -> fs.getXAttr(path, "non-exist"), "Not found."); + assertThrows(IOException.class, () -> fs.getXAttrs(path, Arrays.asList("non-exist")), + "Not found."); + assertThrows(IOException.class, + () -> fs.getXAttrs(path, Arrays.asList("non-exist", XATTR_NAME)), "Not found."); + } + + @Test + public void testSetAndGetWhenPathNotExist() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testXAttrWhenPathNotExist/file"); + fs.delete(path); + + assertThrows(FileNotFoundException.class, () -> fs.setXAttr(path, XATTR_NAME, XATTR_VALUE), + "No such file"); + assertThrows(FileNotFoundException.class, () -> fs.getXAttrs(path), "No such file"); + assertThrows(FileNotFoundException.class, () -> fs.removeXAttr(path, "name"), "No such file"); + } + + @Test + public void testSetAndGet() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testSetAndGet/file"); + fs.create(path).close(); + + fs.setXAttr(path, XATTR_NAME, XATTR_VALUE); + assertArrayEquals(XATTR_VALUE, fs.getXAttr(path, XATTR_NAME)); + } + + @Test + public void testSetAndGetNonExistedObject() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testSetAndGetOnNonExistedObject/dir-0/dir-1/file"); + fs.create(path).close(); + + Path nonExistedPath = path.getParent().getParent(); + fs.setXAttr(nonExistedPath, XATTR_NAME, XATTR_VALUE); + assertThrows(IOException.class, () -> fs.getXAttr(nonExistedPath, XATTR_NAME), "Not found."); + } + + @Test + public void testSetAndGetOnExistedObjectDir() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testSetAndGetOnDir/dir-0/dir-1"); + fs.mkdirs(path); + + fs.setXAttr(path, XATTR_NAME, XATTR_VALUE); + assertThrows(IOException.class, () -> fs.getXAttr(path, XATTR_NAME), "Not found."); + } + + @Test + public void testGetAndListAll() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testGetAndListAll/file"); + fs.create(path).close(); + + int size = 10; + for (int i = 0; i < size; i++) { + fs.setXAttr(path, XATTR_NAME + i, Bytes.toBytes("VALUE" + i)); + } + + Map result = fs.getXAttrs(path); + assertEquals(size, result.size()); + for (int i = 0; i < size; i++) { + assertEquals("VALUE" + i, Bytes.toString(result.get(XATTR_NAME + i))); + } + + List names = fs.listXAttrs(path); + assertEquals(size, names.size()); + for (int i = 0; i < size; i++) { + assertTrue(names.contains(XATTR_NAME + i)); + } + } + + @Test + public void testRemove() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testRemove/file"); + fs.create(path).close(); + + int size = 10; + for (int i = 0; i < size; i++) { + fs.setXAttr(path, XATTR_NAME + i, Bytes.toBytes("VALUE" + i)); + } + + for (int i = 0; i < size; i++) { + fs.removeXAttr(path, XATTR_NAME + i); + String name = XATTR_NAME + i; + assertThrows(IOException.class, () -> fs.getXAttr(path, name), "Not found."); + assertEquals(size - 1 - i, fs.listXAttrs(path).size()); + } + } + + @Test + public void testXAttrFlag() throws Exception { + FileSystem fs = getFileSystem(); + Path path = path("testXAttrFlag/file"); + fs.create(path).close(); + + String key = XATTR_NAME; + byte[] value = XATTR_VALUE; + assertThrows(IOException.class, + () -> fs.setXAttr(path, key, value, EnumSet.of(XAttrSetFlag.REPLACE)), + "The CREATE flag must be specified"); + fs.setXAttr(path, key, value, EnumSet.of(XAttrSetFlag.CREATE)); + assertArrayEquals(value, fs.getXAttr(path, key)); + + byte[] newValue = Bytes.toBytes("new value"); + assertThrows(IOException.class, + () -> fs.setXAttr(path, key, newValue, EnumSet.of(XAttrSetFlag.CREATE)), + "The REPLACE flag must be specified"); + fs.setXAttr(path, key, newValue, EnumSet.of(XAttrSetFlag.REPLACE)); + assertArrayEquals(newValue, fs.getXAttr(path, key)); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TosContract.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TosContract.java new file mode 100644 index 0000000000000..1250ac039a1f6 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/contract/TosContract.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.contract; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractBondedFSContract; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TosContract extends AbstractBondedFSContract { + private static final Logger LOG = LoggerFactory.getLogger(TosContract.class); + private final String testDir; + + public TosContract(Configuration conf) { + super(conf); + addConfResource("contract/tos.xml"); + // Set the correct contract test path if there is a provided bucket name from environment. + if (StringUtils.isNoneEmpty(TestUtility.bucket())) { + conf.set("fs.contract.test.fs.tos", String.format("tos://%s/", TestUtility.bucket())); + } + + testDir = "/test-" + UUIDUtils.random(); + } + + @BeforeAll + public static void before() { + Assumptions.assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + public String getScheme() { + return "tos"; + } + + @Override + public Path getTestPath() { + LOG.info("the test dir is: {}", testDir); + return new Path(testDir); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectStorageTestBase.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectStorageTestBase.java new file mode 100644 index 0000000000000..484a414389ba9 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectStorageTestBase.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class ObjectStorageTestBase { + private static final Logger LOG = LoggerFactory.getLogger(ObjectStorageTestBase.class); + private Configuration conf; + private Configuration tosConf; + private Path testDir; + private FileSystem fs; + private String scheme; + private ObjectStorage storage; + + @TempDir + private java.nio.file.Path tempDir; + + @BeforeEach + public void setUp() throws IOException { + LOG.info("The test temporary folder is {}", tempDir); + + String tempDirPath = tempDir.toAbsolutePath().toString(); + conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key("filestore"), tempDirPath); + conf.set("fs.filestore.impl", LocalFileSystem.class.getName()); + tosConf = new Configuration(conf); + // Set the environment variable for ObjectTestUtils#assertObject + TestUtility.setSystemEnv(FileStore.ENV_FILE_STORAGE_ROOT, tempDirPath); + + testDir = new Path("filestore://" + FileStore.DEFAULT_BUCKET + "/", UUIDUtils.random()); + fs = testDir.getFileSystem(conf); + scheme = testDir.toUri().getScheme(); + storage = ObjectStorageFactory.create(scheme, testDir.toUri().getAuthority(), tosConf); + } + + @AfterEach + public void tearDown() throws IOException { + if (storage != null) { + // List all keys with test dir prefix and delete them. + String prefix = ObjectUtils.pathToKey(testDir); + CommonUtils.runQuietly(() -> storage.deleteAll(prefix)); + // List all multipart uploads and abort them. + CommonUtils.runQuietly(() -> { + for (MultipartUpload upload : storage.listUploads(prefix)) { + LOG.info("Abort the multipart upload {}", upload); + storage.abortMultipartUpload(upload.key(), upload.uploadId()); + } + }); + + storage.close(); + } + } + + public Configuration conf() { + return conf; + } + + public Configuration tosConf() { + return tosConf; + } + + public Path testDir() { + return testDir; + } + + public FileSystem fs() { + return fs; + } + + public String scheme() { + return scheme; + } + + public ObjectStorage getStorage() { + return storage; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectTestUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectTestUtils.java new file mode 100644 index 0000000000000..d8c23552c5e37 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/ObjectTestUtils.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.Objects; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public final class ObjectTestUtils { + + public static final byte[] EMPTY_BYTES = new byte[] {}; + + private ObjectTestUtils() { + } + + /** + * Assert that all the parent directories should be existing. + * + * @param path to validate, can be directory or file. + */ + public static void assertParentDirExist(Path path) throws IOException { + for (Path p = path.getParent(); p != null && p.getParent() != null; p = p.getParent()) { + assertObject(p, EMPTY_BYTES, true); + } + } + + /** + * Assert that all the parent directories and current directory should be existing. + * + * @param path to validate, must be a directory. + */ + public static void assertDirExist(Path path) throws IOException { + // All parent directories exist. + assertParentDirExist(path); + // The current directory exist. + assertObject(path, EMPTY_BYTES, true); + } + + public static void assertObjectNotExist(Path path) throws IOException { + assertObjectNotExist(path, false); + } + + public static void assertObjectNotExist(Path path, boolean isDir) throws IOException { + ObjectStorage store = + ObjectStorageFactory.create(path.toUri().getScheme(), path.toUri().getHost(), + new Configuration()); + String objectKey = ObjectUtils.pathToKey(path, isDir); + ObjectInfo info = store.head(objectKey); + assertNull(info, String.format("Object key %s shouldn't exist in backend storage.", objectKey)); + + store.close(); + } + + public static void assertObject(Path path, byte[] data) throws IOException { + assertObject(path, data, false); + } + + public static void assertObject(Path path, byte[] data, boolean isDir) throws IOException { + ObjectStorage store = + ObjectStorageFactory.create(path.toUri().getScheme(), path.toUri().getHost(), + new Configuration()); + String objectKey = ObjectUtils.pathToKey(path, isDir); + // Verify the existence of object. + ObjectInfo info = store.head(objectKey); + assertNotNull(info, String.format("there should be an key %s in object storage", objectKey)); + assertEquals(info.key(), objectKey); + assertEquals(data.length, info.size()); + // Verify the data content. + try (InputStream in = store.get(objectKey, 0, -1).stream()) { + byte[] actual = IOUtils.toByteArray(in); + assertArrayEquals(data, actual, "Unexpected binary"); + } + + store.close(); + } + + public static void assertMultipartUploadExist(Path path, String uploadId) throws IOException { + ObjectStorage store = + ObjectStorageFactory.create(path.toUri().getScheme(), path.toUri().getHost(), + new Configuration()); + String objectKey = ObjectUtils.pathToKey(path, false); + + Iterator uploadIterator = store.listUploads(objectKey).iterator(); + assertTrue(uploadIterator.hasNext()); + assertMultipartUploadIdExist(uploadIterator, uploadId); + + store.close(); + } + + private static void assertMultipartUploadIdExist(Iterator uploadIterator, + String uploadId) { + boolean exist = false; + while (uploadIterator.hasNext()) { + if (Objects.equals(uploadIterator.next().uploadId(), uploadId)) { + exist = true; + } + } + assertTrue(exist); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestDirectoryStorage.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestDirectoryStorage.java new file mode 100644 index 0000000000000..034b91ce16bc1 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestDirectoryStorage.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import static org.apache.hadoop.fs.tosfs.util.TestUtility.scheme; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestDirectoryStorage { + private final ObjectStorage storage; + + public TestDirectoryStorage() { + Configuration conf = new Configuration(); + storage = + ObjectStorageFactory.createWithPrefix(String.format("%s-%s/", scheme(), UUIDUtils.random()), + scheme(), TestUtility.bucket(), conf); + } + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @AfterEach + public void tearDown() { + CommonUtils.runQuietly(() -> storage.deleteAll("")); + for (MultipartUpload upload : storage.listUploads("")) { + storage.abortMultipartUpload(upload.key(), upload.uploadId()); + } + } + + @Test + public void testListEmptyDir() { + String key = "testListEmptyDir/"; + mkdir(key); + assertNotNull(directoryStorage().head(key)); + + assertFalse(directoryStorage().listDir(key, false).iterator().hasNext()); + assertFalse(directoryStorage().listDir(key, false).iterator().hasNext()); + assertTrue(directoryStorage().isEmptyDir(key)); + } + + @Test + public void testListNonExistDir() { + String key = "testListNonExistDir/"; + assertNull(directoryStorage().head(key)); + + assertFalse(directoryStorage().listDir(key, false).iterator().hasNext()); + assertFalse(directoryStorage().listDir(key, false).iterator().hasNext()); + assertTrue(directoryStorage().isEmptyDir(key)); + } + + @Test + public void testRecursiveList() { + String root = "root/"; + String file1 = "root/file1"; + String file2 = "root/afile2"; + String dir1 = "root/dir1/"; + String file3 = "root/dir1/file3"; + + mkdir(root); + mkdir(dir1); + touchFile(file1, TestUtility.rand(8)); + touchFile(file2, TestUtility.rand(8)); + touchFile(file3, TestUtility.rand(8)); + + assertThat(directoryStorage().listDir(root, false)) + .hasSize(3) + .extracting(ObjectInfo::key) + .contains(dir1, file1, file2); + + assertThat(directoryStorage().listDir(root, true)) + .hasSize(4) + .extracting(ObjectInfo::key) + .contains(dir1, file1, file2, file3); + } + + @Test + public void testRecursiveListWithSmallBatch() { + Configuration conf = new Configuration(directoryStorage().conf()); + conf.setInt(TosKeys.FS_TOS_LIST_OBJECTS_COUNT, 5); + directoryStorage().initialize(conf, directoryStorage().bucket().name()); + + String root = "root/"; + mkdir(root); + + // Create 2 files start with 'a', 2 sub dirs start with 'b', 2 files start with 'c' + for (int i = 1; i <= 2; i++) { + touchFile("root/a-file-" + i, TestUtility.rand(8)); + mkdir("root/b-dir-" + i + "/"); + touchFile("root/c-file-" + i, TestUtility.rand(8)); + } + + // Create two files under each sub dirs. + for (int j = 1; j <= 2; j++) { + touchFile(String.format("root/b-dir-%d/file1", j), TestUtility.rand(8)); + touchFile(String.format("root/b-dir-%d/file2", j), TestUtility.rand(8)); + } + + assertThat(directoryStorage().listDir(root, false)) + .hasSize(6) + .extracting(ObjectInfo::key) + .contains( + "root/a-file-1", "root/a-file-2", + "root/b-dir-1/", "root/b-dir-2/", + "root/c-file-1", "root/c-file-2"); + + assertThat(directoryStorage().listDir(root, true)) + .hasSize(10) + .extracting(ObjectInfo::key) + .contains( + "root/a-file-1", "root/a-file-2", + "root/b-dir-1/", "root/b-dir-1/file1", "root/b-dir-1/file2", + "root/b-dir-2/", "root/b-dir-2/file1", "root/b-dir-2/file2", + "root/c-file-1", "root/c-file-2"); + } + + @Test + public void testRecursiveListRoot() { + String root = "root/"; + String dir1 = "root/dir1/"; + mkdir(root); + mkdir(dir1); + + assertThat(directoryStorage().listDir("", true)) + .hasSize(2) + .extracting(ObjectInfo::key) + .contains("root/", "root/dir1/"); + } + + @Test + public void testDeleteEmptyDir() { + String dir = "a/b/"; + mkdir(dir); + + directoryStorage().deleteDir(dir, false); + assertNull(directoryStorage().head(dir)); + } + + @Test + public void testDeleteNonEmptyDir() { + String dir = "a/b/"; + String subDir = "a/b/c/"; + String file = "a/b/file.txt"; + mkdir(dir); + mkdir(subDir); + touchFile(file, new byte[10]); + + assertThrows(RuntimeException.class, () -> directoryStorage().deleteDir(dir, false)); + assertNotNull(directoryStorage().head(dir)); + assertNotNull(directoryStorage().head(subDir)); + assertNotNull(directoryStorage().head(file)); + + directoryStorage().deleteDir(dir, true); + assertNull(directoryStorage().head(dir)); + assertNull(directoryStorage().head(subDir)); + assertNull(directoryStorage().head(file)); + } + + @Test + public void testRecursiveDeleteDirViaTosSDK() { + Configuration conf = new Configuration(directoryStorage().conf()); + conf.setBoolean(TosKeys.FS_TOS_RMR_CLIENT_ENABLE, true); + directoryStorage().initialize(conf, directoryStorage().bucket().name()); + + testDeleteNonEmptyDir(); + } + + // TOS doesn't enable recursive delete in server side currently. + @Disabled + @Test + public void testAtomicDeleteDir() { + Configuration conf = new Configuration(directoryStorage().conf()); + conf.setBoolean(TosKeys.FS_TOS_RMR_SERVER_ENABLED, true); + directoryStorage().initialize(conf, directoryStorage().bucket().name()); + + testDeleteNonEmptyDir(); + } + + private void touchFile(String key, byte[] data) { + directoryStorage().put(key, data); + } + + private void mkdir(String key) { + directoryStorage().put(key, new byte[0]); + } + + private DirectoryStorage directoryStorage() { + assumeTrue(storage.bucket().isDirectory()); + return (DirectoryStorage) storage; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectMultiRangeInputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectMultiRangeInputStream.java new file mode 100644 index 0000000000000..ee73e9db6d1e8 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectMultiRangeInputStream.java @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.common.Bytes; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.object.exceptions.ChecksumMismatchException; +import org.apache.hadoop.fs.tosfs.util.Range; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestObjectMultiRangeInputStream extends ObjectStorageTestBase { + private static ExecutorService threadPool; + + @BeforeAll + public static void beforeClass() { + threadPool = ThreadPools.newWorkerPool("TestObjectInputStream-pool"); + } + + @AfterAll + public static void afterClass() { + if (!threadPool.isShutdown()) { + threadPool.shutdown(); + } + } + + @Test + public void testSequentialAndRandomRead() throws IOException { + Path outPath = new Path(testDir(), "testSequentialAndRandomRead.txt"); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(5 << 20); + getStorage().put(key, rawData); + + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), + ObjectUtils.pathToKey(outPath), rawData.length, Long.MAX_VALUE, content.checksum())) { + // sequential read + assertEquals(0, in.getPos()); + assertEquals(0, in.nextExpectPos()); + + byte[] b = new byte[1024]; + int readCnt = in.read(b); + assertEquals(readCnt, b.length); + assertArrayEquals(Arrays.copyOfRange(rawData, 0, 1024), b); + assertEquals(1024, in.getPos()); + assertEquals(1024, in.nextExpectPos()); + + readCnt = in.read(b); + assertEquals(readCnt, b.length); + assertArrayEquals(Arrays.copyOfRange(rawData, 1024, 2048), b); + assertEquals(2048, in.getPos()); + assertEquals(2048, in.nextExpectPos()); + + // random read forward + in.seek(4 << 20); + assertEquals(4 << 20, in.getPos()); + assertEquals(2048, in.nextExpectPos()); + + readCnt = in.read(b); + assertEquals(readCnt, b.length); + assertArrayEquals(Arrays.copyOfRange(rawData, 4 << 20, 1024 + (4 << 20)), b); + assertEquals((4 << 20) + 1024, in.getPos()); + assertEquals((4 << 20) + 1024, in.nextExpectPos()); + + // random read back + in.seek(2 << 20); + assertEquals(2 << 20, in.getPos()); + assertEquals((4 << 20) + 1024, in.nextExpectPos()); + + readCnt = in.read(b); + assertEquals(readCnt, b.length); + assertArrayEquals(Arrays.copyOfRange(rawData, 2 << 20, 1024 + (2 << 20)), b); + assertEquals((2 << 20) + 1024, in.getPos()); + assertEquals((2 << 20) + 1024, in.nextExpectPos()); + } + } + + private InputStream getStream(String key) { + return getStorage().get(key).stream(); + } + + @Test + public void testReadSingleByte() throws IOException { + int len = 10; + Path outPath = new Path(testDir(), "testReadSingleByte.txt"); + byte[] data = TestUtility.rand(len); + String key = ObjectUtils.pathToKey(outPath); + byte[] checksum = getStorage().put(key, data); + + try (InputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), key, + data.length, Long.MAX_VALUE, checksum)) { + for (int i = 0; i < data.length; i++) { + assertTrue(in.read() >= 0); + } + assertEquals(-1, in.read()); + } + } + + @Test + public void testReadStreamButTheFileChangedDuringReading() throws IOException { + int len = 2048; + Path outPath = new Path(testDir(), "testReadStreamButTheFileChangedDuringReading.txt"); + byte[] data = TestUtility.rand(len); + String key = ObjectUtils.pathToKey(outPath); + byte[] checksum = getStorage().put(key, data); + + try (InputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), key, + data.length, 1024, checksum)) { + byte[] read = new byte[1024]; + int n = in.read(read); + assertEquals(1024, n); + + getStorage().put(key, TestUtility.rand(1024)); + assertThrows(ChecksumMismatchException.class, () -> in.read(read), "The file is staled"); + } + } + + @Test + public void testRead100M() throws IOException { + testSequentialReadData(100 << 20, 6 << 20); + testSequentialReadData(100 << 20, 5 << 20); + } + + @Test + public void testRead10M() throws IOException { + testSequentialReadData(10 << 20, 4 << 20); + testSequentialReadData(10 << 20, 5 << 20); + } + + @Test + public void testParallelRead10M() throws IOException, ExecutionException, InterruptedException { + testParallelRandomRead(10 << 20, 4 << 20); + testParallelRandomRead(10 << 20, 5 << 20); + } + + @Test + public void testRead100b() throws IOException { + testSequentialReadData(100, 40); + testSequentialReadData(100, 50); + testSequentialReadData(100, 100); + testSequentialReadData(100, 101); + } + + private void testSequentialReadData(int dataSize, int partSize) throws IOException { + Path outPath = new Path(testDir(), String.format("%d-%d.txt", dataSize, partSize)); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(dataSize); + getStorage().put(key, rawData); + + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + int batchSize = (dataSize - 1) / partSize + 1; + try (InputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), + ObjectUtils.pathToKey(outPath), rawData.length, Long.MAX_VALUE, content.checksum())) { + for (int i = 0; i < batchSize; i++) { + int start = i * partSize; + int end = Math.min(dataSize, start + partSize); + byte[] expectArr = Arrays.copyOfRange(rawData, start, end); + + byte[] b = new byte[end - start]; + int ret = in.read(b, 0, b.length); + + assertEquals(b.length, ret); + assertArrayEquals(expectArr, b, String.format("the read bytes mismatched at batch: %d", i)); + } + assertEquals(-1, in.read()); + } + } + + private void testParallelRandomRead(int dataSize, int partSize) + throws IOException, ExecutionException, InterruptedException { + + Path outPath = new Path(testDir(), String.format("%d-%d.txt", dataSize, partSize)); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(dataSize); + getStorage().put(key, rawData); + + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + Random random = new Random(); + List> tasks = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + int position = random.nextInt(rawData.length); + tasks.add(threadPool.submit( + () -> testReadDataFromSpecificPosition(rawData, outPath, position, partSize, + content.checksum()))); + } + + for (Future task : tasks) { + assertTrue(task.get()); + } + } + + private boolean testReadDataFromSpecificPosition( + final byte[] rawData, + final Path objPath, + final int startPosition, + final int partSize, + byte[] checksum) { + int rawDataSize = rawData.length; + int batchSize = (rawDataSize - startPosition - 1) / partSize + 1; + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), + ObjectUtils.pathToKey(objPath), rawDataSize, Long.MAX_VALUE, checksum)) { + in.seek(startPosition); + + for (int i = 0; i < batchSize; i++) { + int start = startPosition + i * partSize; + int end = Math.min(rawDataSize, start + partSize); + byte[] expectArr = Arrays.copyOfRange(rawData, start, end); + + byte[] b = new byte[end - start]; + int ret = in.read(b, 0, b.length); + + assertEquals(b.length, ret); + assertArrayEquals(expectArr, b, String.format("the read bytes mismatched at batch: %d", i)); + } + assertEquals(-1, in.read()); + return true; + } catch (IOException e) { + return false; + } + } + + @Test + public void testParallelReadFromOneInputStream() throws IOException, ExecutionException, + InterruptedException { + testParallelReadFromOneInputStreamImpl(10 << 20, 512, 10); + testParallelReadFromOneInputStreamImpl(10 << 20, 64, 100); + testParallelReadFromOneInputStreamImpl(1 << 20, 2 << 20, 5); + } + + public void testParallelReadFromOneInputStreamImpl(int dataSize, int batchSize, int parallel) + throws IOException, ExecutionException, InterruptedException { + + Path outPath = new Path(testDir(), + String.format("%d-%d-testParallelReadFromOneInputStreamImpl.txt", dataSize, batchSize)); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(dataSize); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + AtomicInteger sum = new AtomicInteger(0); + CopyOnWriteArrayList readBytes = new CopyOnWriteArrayList(); + List> futures = new ArrayList<>(); + try (ObjectMultiRangeInputStream inputStream = new ObjectMultiRangeInputStream(threadPool, + getStorage(), ObjectUtils.pathToKey(outPath), rawData.length, Long.MAX_VALUE, + content.checksum())) { + for (int i = 0; i < parallel; i++) { + futures.add(threadPool.submit(() -> { + byte[] data = new byte[batchSize]; + try { + int count; + while ((count = inputStream.read(data)) != -1) { + sum.getAndAdd(count); + readBytes.add(Arrays.copyOfRange(data, 0, count)); + data = new byte[batchSize]; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } + + for (Future future : futures) { + future.get(); + + } + assertEquals(rawData.length, sum.get()); + } + + byte[] actualBytes = new byte[rawData.length]; + int offset = 0; + for (byte[] bytes : readBytes) { + System.arraycopy(bytes, 0, actualBytes, offset, bytes.length); + offset += bytes.length; + } + + Arrays.sort(actualBytes); + Arrays.sort(rawData); + assertArrayEquals(rawData, actualBytes); + } + + @Test + public void testPositionalRead() throws IOException { + Path outPath = new Path(testDir(), "testPositionalRead.txt"); + String key = ObjectUtils.pathToKey(outPath); + int fileSize = 5 << 20; + byte[] rawData = TestUtility.rand(fileSize); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + Random rand = ThreadLocalRandom.current(); + + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), + ObjectUtils.pathToKey(outPath), fileSize, Long.MAX_VALUE, content.checksum())) { + for (int i = 0; i < 100; i++) { + int pos = rand.nextInt(fileSize); + int len = rand.nextInt(fileSize); + + int expectSize = Math.min(fileSize - pos, len); + byte[] actual = new byte[expectSize]; + int actualLen = in.read(pos, actual, 0, expectSize); + + assertEquals(expectSize, actualLen); + assertArrayEquals(Bytes.toBytes(rawData, pos, expectSize), actual); + } + } + } + + @Test + public void testReadAcrossRange() throws IOException { + Path outPath = new Path(testDir(), "testReadAcrossRange.txt"); + String key = ObjectUtils.pathToKey(outPath); + int fileSize = 1 << 10; + byte[] rawData = TestUtility.rand(fileSize); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream( + ThreadPools.defaultWorkerPool(), getStorage(), key, fileSize, 10, content.checksum())) { + byte[] data = new byte[fileSize / 2]; + for (int i = 0; i < 2; i++) { + assertEquals(data.length, in.read(data)); + assertEquals((i + 1) * data.length, in.getPos()); + assertArrayEquals(Bytes.toBytes(rawData, i * data.length, data.length), data); + } + } + } + + @Test + public void testStorageRange() throws IOException { + Path outPath = new Path(testDir(), "testStorageRange.txt"); + String key = ObjectUtils.pathToKey(outPath); + int fileSize = 5 << 20; + byte[] rawData = TestUtility.rand(fileSize); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + int oneMB = 1 << 20; + long rangeOpenLen = oneMB; + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream( + ThreadPools.defaultWorkerPool(), getStorage(), key, fileSize, rangeOpenLen, + content.checksum())) { + assertNull(in.stream()); + + // Init range. + in.read(); + assertEquals(Range.of(0, rangeOpenLen), in.stream().range()); + // Range doesn't change. + in.read(new byte[(int) (rangeOpenLen - 1)], 0, (int) (rangeOpenLen - 1)); + assertEquals(Range.of(0, rangeOpenLen), in.stream().range()); + + // Move to next range. + in.read(); + assertEquals(Range.of(rangeOpenLen, rangeOpenLen), in.stream().range()); + + // Seek and move. + in.seek(rangeOpenLen * 3 + 10); + in.read(); + assertEquals(Range.of(rangeOpenLen * 3, rangeOpenLen), in.stream().range()); + + // Seek small and range doesn't change. + in.seek(in.getPos() + 1); + in.read(); + assertEquals(Range.of(rangeOpenLen * 3, rangeOpenLen), in.stream().range()); + + // Seek big and range changes. + in.seek(rangeOpenLen * 2); + in.read(new byte[(int) (rangeOpenLen - 10)], 0, (int) (rangeOpenLen - 10)); + assertEquals(Range.of(rangeOpenLen * 2, rangeOpenLen), in.stream().range()); + // Old range has 10 bytes left. Seek 10 bytes then read 10 bytes. Old range can't read any + // bytes, so range changes. + assertEquals(rangeOpenLen * 3 - 10, in.getPos()); + in.seek(rangeOpenLen * 3); + in.read(new byte[10], 0, 10); + assertEquals(Range.of(rangeOpenLen * 3, rangeOpenLen), in.stream().range()); + + // Read big buffer. + in.seek(10); + in.read(new byte[oneMB * 3], 0, oneMB * 3); + assertEquals(oneMB * 3 + 10, in.getPos()); + assertEquals(Range.of(3 * rangeOpenLen, rangeOpenLen), in.stream().range()); + } + + try (ObjectMultiRangeInputStream in = new ObjectMultiRangeInputStream(threadPool, getStorage(), + ObjectUtils.pathToKey(outPath), fileSize, Long.MAX_VALUE, content.checksum())) { + assertNull(in.stream()); + + // Init range. + in.read(); + assertEquals(Range.of(0, fileSize), in.stream().range()); + + // Range doesn't change. + in.read(new byte[oneMB], 0, oneMB); + assertEquals(Range.of(0, fileSize), in.stream().range()); + + // Seek and move. + long pos = oneMB * 3 + 10; + in.seek(pos); + in.read(); + assertEquals(Range.of(0, fileSize), in.stream().range()); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectOutputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectOutputStream.java new file mode 100644 index 0000000000000..7c459dc29dece --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectOutputStream.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.object.staging.StagingPart; +import org.apache.hadoop.fs.tosfs.object.staging.State; +import org.apache.hadoop.fs.tosfs.util.FSUtils; +import org.apache.hadoop.fs.tosfs.util.TempFiles; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.util.Lists; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestObjectOutputStream extends ObjectStorageTestBase { + + private static ExecutorService threadPool; + + @BeforeAll + public static void beforeClass() { + threadPool = ThreadPools.newWorkerPool("TestObjectOutputStream-pool"); + } + + @AfterAll + public static void afterClass() { + if (!threadPool.isShutdown()) { + threadPool.shutdown(); + } + } + + @Test + public void testMkStagingDir() throws ExecutionException, InterruptedException, IOException { + try (TempFiles tmp = TempFiles.of()) { + List tmpDirs = Lists.newArrayList(); + for (int i = 0; i < 3; i++) { + tmpDirs.add(tmp.newDir()); + } + Configuration newConf = new Configuration(tosConf()); + newConf.set(ConfKeys.FS_MULTIPART_STAGING_DIR.key("filestore"), Joiner.on(",").join(tmpDirs)); + + // Start multiple threads to open streams to create staging dir. + List> futures = Collections.synchronizedList(new ArrayList<>()); + for (int i = 0; i < 10; i++) { + futures.add(threadPool.submit(() -> + new ObjectOutputStream(getStorage(), threadPool, newConf, path("none.txt"), true))); + } + for (Future f : futures) { + f.get().close(); + } + } + } + + @Test + public void testWriteZeroByte() throws IOException { + Path zeroByteTxt = path("zero-byte.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), zeroByteTxt, true); + // write zero-byte and close. + out.write(new byte[0], 0, 0); + out.close(); + assertStagingPart(0, out.stagingParts()); + + // Read and validate the dest object contents + ObjectTestUtils.assertObject(zeroByteTxt, ObjectTestUtils.EMPTY_BYTES); + } + + @Test + public void testWriteZeroByteWithoutAllowPut() throws IOException { + Path zeroByteTxt = path("zero-byte-without-allow-put.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), zeroByteTxt, false); + // write zero-byte and close. + out.close(); + assertStagingPart(0, out.stagingParts()); + + // Read and validate the dest object content. + ObjectTestUtils.assertObject(zeroByteTxt, ObjectTestUtils.EMPTY_BYTES); + } + + @Test + public void testDeleteStagingFileWhenUploadPartsOK() throws IOException { + Path path = path("data.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), path, true); + byte[] data = TestUtility.rand((int) (ConfKeys.FS_MULTIPART_SIZE_DEFAULT * 2)); + out.write(data); + out.waitForPartsUpload(); + for (StagingPart part : out.stagingParts()) { + assertEquals(State.CLEANED, part.state()); + } + out.close(); + for (StagingPart part : out.stagingParts()) { + assertEquals(State.CLEANED, part.state()); + } + } + + @Test + public void testDeleteStagingFileWithClose() throws IOException { + Path path = path("data.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), path, true); + byte[] data = TestUtility.rand((int) (ConfKeys.FS_MULTIPART_SIZE_DEFAULT * 2)); + out.write(data); + out.close(); + for (StagingPart part : out.stagingParts()) { + assertEquals(State.CLEANED, part.state()); + } + } + + @Test + public void testDeleteSimplePutStagingFile() throws IOException { + Path smallTxt = path("small.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), smallTxt, true); + byte[] data = TestUtility.rand(4 << 20); + out.write(data); + for (StagingPart part : out.stagingParts()) { + assertTrue(part.size() > 0); + } + out.close(); + for (StagingPart part : out.stagingParts()) { + assertEquals(State.CLEANED, part.state()); + } + } + + @Test + public void testSimplePut() throws IOException { + Path smallTxt = path("small.txt"); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), smallTxt, true); + byte[] data = TestUtility.rand(4 << 20); + out.write(data); + out.close(); + assertStagingPart(1, out.stagingParts()); + assertNull(out.upload(), "Should use the simple PUT to upload object for small file."); + + // Read and validate the dest object content. + ObjectTestUtils.assertObject(smallTxt, data); + } + + public void testWrite(int uploadPartSize, int len) throws IOException { + Configuration newConf = new Configuration(tosConf()); + newConf.setLong(ConfKeys.FS_MULTIPART_SIZE.key(FSUtils.scheme(conf(), testDir().toUri())), + uploadPartSize); + + Path outPath = path(len + ".txt"); + int partNum = (len - 1) / uploadPartSize + 1; + + byte[] data = TestUtility.rand(len); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, newConf, outPath, true); + try { + out.write(data); + } finally { + out.close(); + } + + assertStagingPart(partNum, out.stagingParts()); + ObjectTestUtils.assertObject(outPath, data); + + // List multipart uploads + int uploadsNum = 0; + for (MultipartUpload ignored : getStorage().listUploads(out.destKey())) { + uploadsNum += 1; + } + assertEquals(0L, uploadsNum); + } + + @Test + public void testParallelWriteOneOutPutStream() throws IOException, ExecutionException, + InterruptedException { + testParallelWriteOneOutPutStreamImpl(5 << 20, 10, 128); + testParallelWriteOneOutPutStreamImpl(5 << 20, 10, 1 << 20); + testParallelWriteOneOutPutStreamImpl(5 << 20, 10, 2 << 20); + testParallelWriteOneOutPutStreamImpl(5 << 20, 10, 6 << 20); + } + + public void testParallelWriteOneOutPutStreamImpl(int partSize, int epochs, int batchSize) + throws IOException, ExecutionException, InterruptedException { + Configuration newConf = new Configuration(tosConf()); + newConf.setLong(ConfKeys.FS_MULTIPART_SIZE.key(FSUtils.scheme(conf(), testDir().toUri())), + partSize); + + String file = + String.format("%d-%d-%d-testParallelWriteOneOutPutStream.txt", partSize, epochs, batchSize); + Path outPath = path(file); + try (ObjectOutputStream out = new ObjectOutputStream(getStorage(), threadPool, newConf, outPath, + true)) { + List> futures = new ArrayList<>(); + for (int i = 0; i < epochs; i++) { + final int index = i; + futures.add(threadPool.submit(() -> { + try { + out.write(dataset(batchSize, index)); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } + + // wait for all tasks finished + for (Future future : futures) { + future.get(); + } + } + + try (InputStream inputStream = getStorage().get(ObjectUtils.pathToKey(outPath)).stream()) { + List ret = new ArrayList<>(); + byte[] data = new byte[batchSize]; + while (inputStream.read(data) != -1) { + ret.add(data); + data = new byte[batchSize]; + } + + assertEquals(epochs, ret.size()); + List sortedRet = ret.stream() + .sorted(Comparator.comparingInt(o -> o[0])) + .collect(Collectors.toList()); + + int j = 0; + for (byte[] e : sortedRet) { + assertArrayEquals(dataset(batchSize, j), e); + j++; + } + } + } + + public static byte[] dataset(int len, int base) { + byte[] dataset = new byte[len]; + for (int i = 0; i < len; i++) { + dataset[i] = (byte) (base); + } + return dataset; + } + + @Test + public void testWrite1MB() throws IOException { + testWrite(5 << 20, 1 << 20); + testWrite(8 << 20, 1 << 20); + testWrite(16 << 20, 1 << 20); + } + + @Test + public void testWrite24MB() throws IOException { + testWrite(5 << 20, 24 << 20); + testWrite(8 << 20, 24 << 20); + testWrite(16 << 20, 24 << 20); + } + + @Test + public void testWrite100MB() throws IOException { + testWrite(5 << 20, 100 << 20); + testWrite(8 << 20, 100 << 20); + testWrite(16 << 20, 100 << 20); + } + + private void testMultipartThreshold(int partSize, int multipartThreshold, int dataSize) + throws IOException { + Configuration newConf = new Configuration(tosConf()); + newConf.setLong(ConfKeys.FS_MULTIPART_SIZE.key(scheme()), partSize); + newConf.setLong(ConfKeys.FS_MULTIPART_THRESHOLD.key(scheme()), multipartThreshold); + Path outPath = + path(String.format("threshold-%d-%d-%d.txt", partSize, multipartThreshold, dataSize)); + + byte[] data = TestUtility.rand(dataSize); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, newConf, outPath, true); + try { + // Verify for every 1MB data writing, unless reaching the threshold. + int upperLimit = Math.min(multipartThreshold, dataSize); + int curOff = 0; + for (; curOff < upperLimit; curOff += (1 << 20)) { + int end = Math.min(curOff + (1 << 20), upperLimit); + out.write(Arrays.copyOfRange(data, curOff, end)); + + List uploads = Lists.newArrayList(getStorage().listUploads(out.destKey())); + if (end < multipartThreshold) { + assertEquals(0, uploads.size(), + "Shouldn't has any uploads because it just use simple PUT"); + } else { + assertEquals(1, uploads.size(), "Switch to use MPU."); + } + assertEquals((end - 1) / partSize + 1, out.stagingParts().size()); + } + + // Verify for every 1MB data writing, unless reaching the data size. + for (; curOff < dataSize; curOff += (1 << 20)) { + int end = Math.min(curOff + (1 << 20), dataSize); + out.write(Arrays.copyOfRange(data, curOff, end)); + + List uploads = Lists.newArrayList(getStorage().listUploads(out.destKey())); + assertEquals(1, uploads.size()); + assertEquals(out.destKey(), uploads.get(0).key()); + assertEquals((end - 1) / partSize + 1, out.stagingParts().size()); + } + } finally { + out.close(); + } + + assertStagingPart((dataSize - 1) / partSize + 1, out.stagingParts()); + ObjectTestUtils.assertObject(outPath, data); + + List uploads = Lists.newArrayList(getStorage().listUploads(out.destKey())); + assertEquals(0, uploads.size()); + } + + @Test + public void testMultipartThreshold2MB() throws IOException { + testMultipartThreshold(5 << 20, 2 << 20, 1 << 20); + testMultipartThreshold(5 << 20, 2 << 20, (2 << 20) - 1); + testMultipartThreshold(5 << 20, 2 << 20, 2 << 20); + testMultipartThreshold(5 << 20, 2 << 20, 4 << 20); + testMultipartThreshold(5 << 20, 2 << 20, 5 << 20); + testMultipartThreshold(5 << 20, 2 << 20, (5 << 20) + 1); + testMultipartThreshold(5 << 20, 2 << 20, 6 << 20); + testMultipartThreshold(5 << 20, 2 << 20, 10 << 20); + testMultipartThreshold(5 << 20, 2 << 20, 20 << 20); + } + + @Test + public void testMultipartThreshold5MB() throws IOException { + testMultipartThreshold(5 << 20, 5 << 20, 1 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 4 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 5 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 5 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 6 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 10 << 20); + testMultipartThreshold(5 << 20, 5 << 20, 20 << 20); + } + + @Test + public void testMultipartThreshold10MB() throws IOException { + testMultipartThreshold(5 << 20, 10 << 20, 1 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 10 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 11 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 15 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 20 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 40 << 20); + testMultipartThreshold(5 << 20, 10 << 20, 30 << 20); + } + + @Test + public void testCloseStreamTwice() throws IOException { + int len = 100; + Path outPath = path(len + ".txt"); + int partNum = 1; + + byte[] data = TestUtility.rand(len); + ObjectOutputStream out = + new ObjectOutputStream(getStorage(), threadPool, tosConf(), outPath, true); + try { + out.write(data); + out.close(); + } finally { + out.close(); + } + + assertStagingPart(partNum, out.stagingParts()); + ObjectTestUtils.assertObject(outPath, data); + } + + @Test + public void testWriteClosedStream() throws IOException { + byte[] data = TestUtility.rand(10); + Path outPath = path("testWriteClosedStream.txt"); + try (ObjectOutputStream out = new ObjectOutputStream(getStorage(), threadPool, tosConf(), + outPath, true)) { + out.close(); + out.write(data); + } catch (IllegalStateException e) { + assertEquals("OutputStream is closed.", e.getMessage()); + } + } + + private static void assertStagingPart(int expectedNum, List parts) { + assertEquals(expectedNum, parts.size()); + for (StagingPart part : parts) { + assertTrue(part.size() > 0); + } + } + + private Path path(String name) { + return new Path(testDir(), name); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectRangeInputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectRangeInputStream.java new file mode 100644 index 0000000000000..45db5c66fb6bc --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectRangeInputStream.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.util.Range; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestObjectRangeInputStream extends ObjectStorageTestBase { + + @Test + public void testRead() throws IOException { + Path outPath = new Path(testDir(), "testRead.txt"); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(1 << 10); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + int position = 100; + int len = 200; + try (ObjectRangeInputStream ri = new ObjectRangeInputStream(getStorage(), key, + Range.of(position, len), content.checksum())) { + // Test read byte. + assertEquals(rawData[position] & 0xff, ri.read()); + + // Test read buffer. + byte[] buffer = new byte[len]; + assertEquals(buffer.length - 1, ri.read(buffer, 0, buffer.length)); + assertArrayEquals( + Arrays.copyOfRange(rawData, position + 1, position + len), + Arrays.copyOfRange(buffer, 0, buffer.length - 1)); + assertEquals(0, ri.available()); + + assertEquals(-1, ri.read()); + assertEquals(-1, ri.read(buffer, 0, buffer.length)); + } + } + + @Test + public void testRangeExceedInnerStream() throws IOException { + Path outPath = new Path(testDir(), "testRangeExceedInnerStream.txt"); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(10); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + int position = 10; + int badLen = 10; + try (ObjectRangeInputStream ri = new ObjectRangeInputStream(getStorage(), key, + Range.of(position, badLen), content.checksum())) { + byte[] buffer = new byte[1]; + assertEquals(-1, ri.read()); + assertEquals(-1, ri.read(buffer, 0, buffer.length)); + } + } + + @Test + public void testRangeInclude() throws IOException { + Path outPath = new Path(testDir(), "testRangeInclude.txt"); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(10); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + long pos = 100; + long len = 300; + + try (ObjectRangeInputStream in = new ObjectRangeInputStream(getStorage(), key, + Range.of(pos, len), content.checksum())) { + assertEquals(Range.of(pos, len), in.range()); + + assertTrue(in.include(pos)); + assertTrue(in.include((pos + len) / 2)); + assertTrue(in.include(pos + len - 1)); + + assertFalse(in.include(pos - 1)); + assertFalse(in.include(pos + len)); + } + } + + @Test + public void testSeek() throws IOException { + Path outPath = new Path(testDir(), "testSeek.txt"); + String key = ObjectUtils.pathToKey(outPath); + byte[] rawData = TestUtility.rand(1 << 10); + getStorage().put(key, rawData); + ObjectContent content = getStorage().get(key); + assertArrayEquals(rawData, IOUtils.toByteArray(content.stream())); + + long pos = 100; + long len = 300; + + try (ObjectRangeInputStream in = new ObjectRangeInputStream(getStorage(), key, + Range.of(pos, len), content.checksum())) { + assertEquals(pos, in.getPos()); + + Exception error = assertThrows(IllegalArgumentException.class, () -> in.seek(-1), "Overflow"); + assertTrue(error.getMessage().contains("must be in range Range{offset=100, length=300}")); + error = assertThrows(IllegalArgumentException.class, () -> in.seek(99), "Overflow"); + assertTrue(error.getMessage().contains("must be in range Range{offset=100, length=300}")); + error = assertThrows(IllegalArgumentException.class, () -> in.seek(401), "Overflow"); + assertTrue(error.getMessage().contains("must be in range Range{offset=100, length=300}")); + error = assertThrows(IllegalArgumentException.class, () -> in.seek(1 << 20), "Overflow"); + assertTrue(error.getMessage().contains("must be in range Range{offset=100, length=300}")); + + in.seek(399); + assertTrue(0 <= in.read()); + assertEquals(-1, in.read()); + + in.seek(100); + assertTrue(in.read() >= 0); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectStorage.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectStorage.java new file mode 100644 index 0000000000000..1c18219404f26 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/TestObjectStorage.java @@ -0,0 +1,1495 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object; + +import com.volcengine.tos.TosServerException; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.exceptions.InvalidObjectKeyException; +import org.apache.hadoop.fs.tosfs.object.exceptions.NotAppendableException; +import org.apache.hadoop.fs.tosfs.object.request.ListObjectsRequest; +import org.apache.hadoop.fs.tosfs.object.response.ListObjectsResponse; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TempFiles; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.apache.hadoop.util.Lists; +import org.apache.hadoop.util.Preconditions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestObjectStorage { + private static final String FILE_STORE_ROOT = TempFiles.newTempDir("TestObjectStorage"); + private ObjectStorage storage; + + public static Stream provideArguments() { + assumeTrue(TestEnv.checkTestEnabled()); + + List values = new ArrayList<>(); + for (ObjectStorage store : TestUtility.createTestObjectStorage(FILE_STORE_ROOT)) { + values.add(Arguments.of(store)); + } + return values.stream(); + } + + private void setEnv(ObjectStorage objectStore) { + this.storage = objectStore; + } + + @AfterEach + public void tearDown() { + CommonUtils.runQuietly(() -> storage.deleteAll("")); + for (MultipartUpload upload : storage.listUploads("")) { + storage.abortMultipartUpload(upload.key(), upload.uploadId()); + } + } + + @AfterAll + public static void afterClass() throws Exception { + CommonUtils.runQuietly(() -> TempFiles.deleteDir(FILE_STORE_ROOT)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testHeadNonExistObject(ObjectStorage store) { + setEnv(store); + assertNull(storage.head("a/b/c.txt")); + + byte[] data = TestUtility.rand(256); + storage.put("a/b/c.txt", data); + assertNotNull(storage.head("a/b/c.txt")); + + assertNull(storage.head("a/b/c/non-exits")); + if (storage.bucket().isDirectory()) { + assertThrows(InvalidObjectKeyException.class, () -> storage.head("a/b/c.txt/non-exits")); + } else { + assertNull(storage.head("a/b/c.txt/non-exits")); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testHeadExistObject(ObjectStorage store) { + setEnv(store); + byte[] data = TestUtility.rand(256); + String key = "testHeadExistObject.txt"; + storage.put(key, data); + + ObjectInfo obj = storage.head(key); + assertEquals(key, obj.key()); + assertFalse(obj.isDir()); + if (storage.bucket().isDirectory()) { + assertThrows(InvalidObjectKeyException.class, () -> storage.head(key + "/")); + } else { + assertNull(storage.head(key + "/")); + } + + String dirKey = "testHeadExistObject/"; + storage.put(dirKey, new byte[0]); + obj = storage.head(dirKey); + assertEquals(dirKey, obj.key()); + assertTrue(obj.isDir()); + + if (storage.bucket().isDirectory()) { + obj = storage.head("testHeadExistObject"); + assertEquals("testHeadExistObject", obj.key()); + assertTrue(obj.isDir()); + } else { + assertNull(storage.head("testHeadExistObject")); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testGetAndDeleteNonExistFile(ObjectStorage store) { + setEnv(store); + // ensure file is not exist + assertNull(storage.head("a/b/c.txt")); + + assertThrows(RuntimeException.class, () -> storage.get("a/b/c.txt", 0, 0)); + assertThrows(RuntimeException.class, () -> storage.get("a/b/c.txt", 0, 1)); + + // Allow to delete a non-exist object. + storage.delete("a/b/c.txt"); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutAndDeleteFileWithEmptyKey(ObjectStorage store) { + setEnv(store); + assertThrows(RuntimeException.class, () -> storage.put("", new byte[0])); + assertThrows(RuntimeException.class, () -> storage.put(null, new byte[0])); + assertThrows(RuntimeException.class, () -> storage.delete(null)); + assertThrows(RuntimeException.class, () -> storage.head("")); + assertThrows(RuntimeException.class, () -> storage.head(null)); + assertThrows(RuntimeException.class, () -> getStream("")); + assertThrows(RuntimeException.class, () -> getStream(null)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutObjectButContentLengthDisMatch(ObjectStorage store) throws IOException { + setEnv(store); + byte[] data = TestUtility.rand(256); + String key = "a/truncated.txt"; + + // The final object data will be truncated if content length is smaller. + byte[] checksum = storage.put(key, () -> new ByteArrayInputStream(data), 200); + assertArrayEquals(Arrays.copyOfRange(data, 0, 200), IOUtils.toByteArray(getStream(key))); + ObjectInfo info = storage.head(key); + assertEquals(key, info.key()); + assertEquals(200, info.size()); + assertArrayEquals(checksum, info.checksum()); + + // Will create object failed is the content length is bigger. + assertThrows(RuntimeException.class, + () -> storage.put(key, () -> new ByteArrayInputStream(data), 300)); + } + + private InputStream getStream(String key) { + return storage.get(key).stream(); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutAndGetFile(ObjectStorage store) throws IOException { + setEnv(store); + byte[] data = TestUtility.rand(256); + String key = "a/test.txt"; + byte[] checksum = storage.put(key, data); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + + if (storage.bucket().isDirectory()) { + // Directory bucket will create missed parent dir. + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream("a"))); + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream("a/"))); + } else { + assertNull(storage.head("a")); + assertNull(storage.head("a/")); + } + + ObjectInfo info = storage.head(key); + assertEquals(key, info.key()); + assertEquals(data.length, info.size()); + assertArrayEquals(checksum, info.checksum()); + + ObjectContent content = storage.get(key); + assertArrayEquals(info.checksum(), content.checksum()); + assertArrayEquals(data, IOUtils.toByteArray(content.stream())); + + assertArrayEquals(data, IOUtils.toByteArray(getStream(key, 0, -1))); + assertThrows(RuntimeException.class, () -> storage.get(key, -1, -1), "offset is negative"); + assertThrows(RuntimeException.class, () -> storage.get(key + "/", 0, -1), + "path not found or resource type is invalid"); + + assertArrayEquals(data, IOUtils.toByteArray(getStream(key, 0, 256))); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key, 0, 512))); + + byte[] secondHalfData = Arrays.copyOfRange(data, 128, 256); + assertArrayEquals(secondHalfData, IOUtils.toByteArray(getStream(key, 128, -1))); + assertArrayEquals(secondHalfData, IOUtils.toByteArray(getStream(key, 128, 256))); + assertArrayEquals(secondHalfData, IOUtils.toByteArray(getStream(key, 128, 257))); + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream(key, 128, 0))); + + ObjectContent partContent = storage.get(key, 8, 32); + assertArrayEquals(info.checksum(), partContent.checksum()); + assertArrayEquals(Arrays.copyOfRange(data, 8, 40), + IOUtils.toByteArray(partContent.stream())); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + + assertThrows(RuntimeException.class, () -> storage.get(key, 257, 8), + "offset is bigger than object length"); + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream(key, 256, 8))); + + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream(key, 0, 0))); + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream(key, 1, 0))); + + + // assert the original data is not changed during random get request + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + + storage.delete(key); + assertNull(storage.head(key)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testAppendAndGetFile(ObjectStorage store) throws Exception { + setEnv(store); + String key = "a/testAppendAndGetFile.txt"; + + // Append zero bytes. + assertThrows(NotAppendableException.class, () -> storage.append(key, new byte[0]), + "Append non-existed object with zero byte is not supported."); + + // Append 256 bytes. + byte[] data = TestUtility.rand(256); + byte[] checksum = storage.append(key, data); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + + // Append zero bytes. + byte[] newChecksum = storage.append(key, new byte[0]); + assertArrayEquals(checksum, newChecksum); + checksum = newChecksum; + + // Append one byte. + newChecksum = storage.append(key, new byte[1]); + assertFalse(Arrays.equals(checksum, newChecksum)); + assertArrayEquals(newChecksum, storage.head(key).checksum()); + checksum = newChecksum; + + // Append 1024 byte. + data = TestUtility.rand(1024); + newChecksum = storage.append(key, data); + assertFalse(Arrays.equals(checksum, newChecksum)); + assertArrayEquals(newChecksum, storage.head(key).checksum()); + + storage.delete(key); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testAppendLengthNotMatch(ObjectStorage store) { + setEnv(store); + byte[] data = TestUtility.rand(256); + String key = "a/testAppendLengthNotMatch.txt"; + storage.append(key, () -> new ByteArrayInputStream(data), 128); + assertEquals(128, storage.head(key).size()); + + assertThrows(RuntimeException.class, + () -> storage.append(key, () -> new ByteArrayInputStream(data), 1024), + "Expect unexpected end of stream error."); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testHeadAndListAndObjectStatusShouldGetSameObjectInfo(ObjectStorage store) { + setEnv(store); + String key = "testHeadAndListObjectCheckSum.txt"; + byte[] data = TestUtility.rand(256); + byte[] checksum = storage.put(key, data); + + ObjectInfo obj = storage.head(key); + assertEquals(obj, storage.objectStatus(key)); + if (!storage.bucket().isDirectory()) { + List objects = toList(storage.list(key, null, 1)); + assertEquals(1, objects.size()); + assertEquals(obj, objects.get(0)); + assertArrayEquals(checksum, objects.get(0).checksum()); + } + + + key = "testHeadAndListObjectCheckSum/"; + checksum = storage.put(key, new byte[0]); + obj = storage.head(key); + assertEquals(obj, storage.objectStatus(key)); + if (!storage.bucket().isDirectory()) { + List objects = toList(storage.list(key, null, 1)); + assertEquals(1, objects.size()); + assertEquals(obj, objects.get(0)); + assertArrayEquals(checksum, objects.get(0).checksum()); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testObjectStatus(ObjectStorage store) { + setEnv(store); + // test get file status + String key = "a/b/testObjectStatus.txt"; + byte[] data = TestUtility.rand(256); + byte[] checksum = storage.put(key, data); + + ObjectInfo obj = storage.head(key); + assertArrayEquals(checksum, obj.checksum()); + assertEquals(obj, storage.objectStatus(key)); + + if (storage.bucket().isDirectory()) { + assertThrows(InvalidObjectKeyException.class, () -> storage.head(key + "/")); + assertThrows(InvalidObjectKeyException.class, () -> storage.objectStatus(key + "/")); + } else { + assertNull(storage.head(key + "/")); + assertNull(storage.objectStatus(key + "/")); + } + + // test get dir status + String dirKey = "a/b/dir/"; + checksum = storage.put(dirKey, new byte[0]); + obj = storage.head(dirKey); + assertEquals(Constants.MAGIC_CHECKSUM, checksum); + assertArrayEquals(Constants.MAGIC_CHECKSUM, checksum); + assertArrayEquals(checksum, obj.checksum()); + assertTrue(obj.isDir()); + assertEquals(dirKey, obj.key()); + assertEquals(obj, storage.objectStatus(dirKey)); + + if (storage.bucket().isDirectory()) { + assertNotNull(storage.head("a/b/dir")); + assertEquals("a/b/dir", storage.objectStatus("a/b/dir").key()); + } else { + assertNull(storage.head("a/b/dir")); + assertEquals(dirKey, storage.objectStatus("a/b/dir").key()); + } + + // test get dir status of prefix + String prefix = "a/b/"; + obj = storage.objectStatus(prefix); + assertEquals(prefix, obj.key()); + assertEquals(Constants.MAGIC_CHECKSUM, obj.checksum()); + assertTrue(obj.isDir()); + + if (storage.bucket().isDirectory()) { + assertEquals(obj, storage.head(prefix)); + assertEquals("a/b", storage.objectStatus("a/b").key()); + } else { + assertNull(storage.head(prefix)); + assertEquals(prefix, storage.objectStatus("a/b").key()); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutAndGetDirectory(ObjectStorage store) throws IOException { + setEnv(store); + String key = "a/b/"; + byte[] data = new byte[0]; + storage.put(key, data); + + ObjectInfo info = storage.head(key); + assertEquals(key, info.key()); + assertEquals(data.length, info.size()); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key, 0, 256))); + + // test create the same dir again + storage.put(key, data); + + storage.delete(key); + assertNull(storage.head(key)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testOverwriteFile(ObjectStorage store) throws IOException { + setEnv(store); + String key = "a/test.txt"; + byte[] data1 = TestUtility.rand(256); + byte[] data2 = TestUtility.rand(128); + + storage.put(key, data1); + assertArrayEquals(data1, IOUtils.toByteArray(getStream(key, 0, -1))); + + storage.put(key, data2); + assertArrayEquals(data2, IOUtils.toByteArray(getStream(key, 0, -1))); + + storage.delete(key); + assertNull(storage.head(key)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectsWithEmptyDelimiters(ObjectStorage store) { + setEnv(store); + // Directory bucket only supports list with delimiter = '/' currently. + assumeFalse(storage.bucket().isDirectory()); + String key1 = "a/b/c/d"; + String key2 = "a/b"; + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%s/file-%d.txt", key1, i), data); + storage.put(String.format("%s/file-%d.txt", key2, i), data); + } + + // list 100 objects under 'a/', there are total 20 objects. + ListObjectsResponse response = list("a/", "", 100, ""); + assertEquals(20, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/c/d/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(19).key()); + + // list 20 objects and there only have 20 objects under 'a/' + response = list("a/", "", 20, ""); + assertEquals(20, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/c/d/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(19).key()); + + // list the top 10 objects among 20 objects + response = list("a/", "", 10, ""); + assertEquals(10, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/c/d/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/c/d/file-9.txt", response.objects().get(9).key()); + + // list the next 5 objects behind a/b/c/d/file-9.txt among 20 objects + response = list("a/", "a/b/c/d/file-9.txt", 5, ""); + assertEquals(5, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-4.txt", response.objects().get(4).key()); + + // list the next 10 objects behind a/b/c/d/file-9.txt among 20 objects + response = list("a/", "a/b/c/d/file-9.txt", 10, ""); + assertEquals(10, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(9).key()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListEmptyDirWithSlashDelimiter(ObjectStorage store) { + setEnv(store); + String key = "a/b/"; + storage.put(key, new byte[0]); + + ListObjectsResponse response = list(key, null, 10, "/"); + assertEquals(1, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/", response.objects().get(0).key()); + + response = list(key, key, 10, "/"); + assertEquals(0, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDeleteMultipleKeys(ObjectStorage store) { + setEnv(store); + String prefix = "a/b"; + byte[] data = TestUtility.rand(256); + + List keys = Lists.newArrayList(); + for (int i = 0; i < 50; i++) { + String existingKey = String.format("%s/existing-file-%d.txt", prefix, i); + storage.put(existingKey, data); + keys.add(existingKey); + + String unExistingKey = String.format("%s/unExisting-file-%d.txt", prefix, i); + keys.add(unExistingKey); + } + + List failedKeys = storage.batchDelete(keys); + + for (String key : failedKeys) { + assertNotNull(storage.head(key)); + } + + for (String key : keys) { + if (!failedKeys.contains(key)) { + assertNull(storage.head(key)); + } + } + + assertThrows(IllegalArgumentException.class, () -> storage.batchDelete( + IntStream.range(0, 1001).mapToObj(String::valueOf).collect(Collectors.toList())), + "The deleted keys size should be <= 1000"); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectsWithEmptyMarkers(ObjectStorage store) { + setEnv(store); + String key1 = "a/b/c/d"; + String key2 = "a/b"; + String key3 = "a1/b1"; + + // create the folder to compatible with directory bucket. + storage.put("a/", new byte[0]); + storage.put("a/b/", new byte[0]); + storage.put("a/b/c/", new byte[0]); + storage.put("a/b/c/d/", new byte[0]); + storage.put("a1/", new byte[0]); + storage.put("a1/b1/", new byte[0]); + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%s/file-%d.txt", key1, i), data); + storage.put(String.format("%s/file-%d.txt", key2, i), data); + storage.put(String.format("%s/file-%d.txt", key3, i), data); + } + + // group objects by '/' under 'a/' + ListObjectsResponse response = list("a/", null, 100, "/"); + assertEquals(1, response.objects().size()); + assertEquals("a/", response.objects().get(0).key()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + + response = list("a", null, 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(2, response.commonPrefixes().size()); + assertEquals("a/", response.commonPrefixes().get(0)); + assertEquals("a1/", response.commonPrefixes().get(1)); + + // group objects by '/' under 'a/b/' and group objects by 'b/' under 'a', they are same + response = list("a/b/", null, 100, "/"); + assertEquals(11, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/", response.objects().get(0).key()); + assertEquals("a/b/file-0.txt", response.objects().get(1).key()); + assertEquals("a/b/file-9.txt", response.objects().get(10).key()); + + response = list("a/b", null, 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + + if (!storage.bucket().isDirectory()) { + // Directory bucket only supports list with delimiter = '/' currently. + response = list("a", null, 100, "b/"); + assertEquals(13, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a/", response.objects().get(0).key()); + assertEquals("a1/", response.objects().get(1).key()); + assertEquals("a1/b1/", response.objects().get(2).key()); + assertEquals("a1/b1/file-0.txt", response.objects().get(3).key()); + assertEquals("a1/b1/file-9.txt", response.objects().get(12).key()); + + response = list("a/", null, 100, "b/"); + assertEquals(1, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a/", response.objects().get(0).key()); + } + + // group objects by different delimiter under 'a/b/c/d/' or 'a/b/c/d' + response = list("a/b/c/d/", null, 100, "/"); + assertEquals(11, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/c/d/", response.objects().get(0).key()); + + response = list("a/b/c/d/", null, 5, "/"); + assertEquals(5, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a/b/c/d/", response.objects().get(0).key()); + + response = list("a/b/c/d", null, 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/c/d/", response.commonPrefixes().get(0)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectWithLimitObjectAndCommonPrefixes(ObjectStorage store) { + setEnv(store); + String key1 = "a/b/c/d"; + String key2 = "a/b"; + String key3 = "a1/b1"; + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%s/file-%d.txt", key1, i), data); + storage.put(String.format("%s/file-%d.txt", key2, i), data); + storage.put(String.format("%s/file-%d.txt", key3, i), data); + } + + List dirKeys = Lists.newArrayList("a/b/d/", "a/b/e/", "a/b/f/", "a/b/g/"); + for (String key : dirKeys) { + storage.put(key, new byte[0]); + } + + // group objects by '/' under 'a/b/', and limit top 5 objects among 10 objects and 1 common + // prefix. + ListObjectsResponse response = list("a/b/", "a/b/", 5, "/"); + assertEquals(1, response.objects().size()); + assertEquals(4, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + assertEquals("a/b/e/", response.commonPrefixes().get(2)); + assertEquals("a/b/f/", response.commonPrefixes().get(3)); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + + response = list("a/b/", "a/b/", 14, "/"); + assertEquals(10, response.objects().size()); + assertEquals(4, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + assertEquals("a/b/e/", response.commonPrefixes().get(2)); + assertEquals("a/b/f/", response.commonPrefixes().get(3)); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(9).key()); + + response = list("a/b/", "a/b/", 15, "/"); + assertEquals(10, response.objects().size()); + assertEquals(5, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + assertEquals("a/b/e/", response.commonPrefixes().get(2)); + assertEquals("a/b/f/", response.commonPrefixes().get(3)); + assertEquals("a/b/g/", response.commonPrefixes().get(4)); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(9).key()); + + // a/b/h-file-0.txt is behind from a/b/g/ + storage.put("a/b/h-file-0.txt", data); + response = list("a/b/", "a/b/", 15, "/"); + assertEquals(10, response.objects().size()); + assertEquals(5, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + assertEquals("a/b/e/", response.commonPrefixes().get(2)); + assertEquals("a/b/f/", response.commonPrefixes().get(3)); + assertEquals("a/b/g/", response.commonPrefixes().get(4)); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/file-9.txt", response.objects().get(9).key()); + + response = list("a/b/", "a/b/", 20, "/"); + assertEquals(11, response.objects().size()); + assertEquals(5, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + assertEquals("a/b/e/", response.commonPrefixes().get(2)); + assertEquals("a/b/f/", response.commonPrefixes().get(3)); + assertEquals("a/b/g/", response.commonPrefixes().get(4)); + assertEquals("a/b/file-0.txt", response.objects().get(0).key()); + assertEquals("a/b/h-file-0.txt", response.objects().get(10).key()); + + response = list("a/b/", "a/b/", 1, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + + response = list("a/b/", "a/b/", 2, "/"); + assertEquals(0, response.objects().size()); + assertEquals(2, response.commonPrefixes().size()); + assertEquals("a/b/c/", response.commonPrefixes().get(0)); + assertEquals("a/b/d/", response.commonPrefixes().get(1)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListedIteratorIsIdempotent(ObjectStorage store) { + setEnv(store); + String key1 = "a/b/c/d"; + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%s/file-%d.txt", key1, i), data); + } + + Iterable res; + if (storage.bucket().isDirectory()) { + res = ((DirectoryStorage) storage).listDir("a/b/c/d/", true); + } else { + res = storage.list("a/b/c/d/", "a/b/c/d/", 10); + } + Iterator batch1 = res.iterator(); + Iterator batch2 = res.iterator(); + + for (int i = 0; i < 10; i++) { + assertTrue(batch1.hasNext()); + ObjectInfo obj = batch1.next(); + assertEquals(String.format("a/b/c/d/file-%d.txt", i), obj.key()); + } + assertFalse(batch1.hasNext()); + + for (int i = 0; i < 10; i++) { + assertTrue(batch2.hasNext()); + ObjectInfo obj = batch2.next(); + assertEquals(String.format("a/b/c/d/file-%d.txt", i), obj.key()); + } + assertFalse(batch2.hasNext()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectsWithSmallBatch(ObjectStorage store) { + setEnv(store); + assumeFalse(storage.bucket().isDirectory()); + String key1 = "a/b/c/d/"; + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%sfile-%d.txt", key1, i), data); + } + + // change list object count + Configuration newConf = new Configuration(storage.conf()); + newConf.setInt(TosKeys.FS_TOS_LIST_OBJECTS_COUNT, 5); + storage.initialize(newConf, storage.bucket().name()); + + List maxKeys = Arrays.asList(5, 10, 9, 20, -1); + for (int maxKey : maxKeys) { + Iterator objs = storage.list(key1, key1, maxKey).iterator(); + int end = Math.min(maxKey == -1 ? 10 : maxKey, 10); + for (int i = 0; i < end; i++) { + assertTrue(objs.hasNext()); + ObjectInfo obj = objs.next(); + assertEquals(String.format("a/b/c/d/file-%d.txt", i), obj.key()); + } + assertFalse(objs.hasNext()); + } + + // reset list object count + newConf = new Configuration(storage.conf()); + newConf.setInt(TosKeys.FS_TOS_LIST_OBJECTS_COUNT, 1000); + storage.initialize(newConf, storage.bucket().name()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectsWithSpecificDelimiters(ObjectStorage store) { + setEnv(store); + assumeFalse(storage.bucket().isDirectory()); + String key1 = "a/b/c/d"; + String key2 = "a/b"; + String key3 = "a1/b1"; + + byte[] data = TestUtility.rand(256); + for (int i = 0; i < 10; i++) { + storage.put(String.format("%s/file-%d.txt", key1, i), data); + storage.put(String.format("%s/file-%d.txt", key2, i), data); + storage.put(String.format("%s/file-%d.txt", key3, i), data); + } + + ListObjectsResponse response = list("a", "", 11, "b/"); + assertEquals(10, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a1/b1/file-0.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-9.txt", response.objects().get(9).key()); + + response = list("a", "", 5, "b/"); + assertEquals(4, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a1/b1/file-0.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-3.txt", response.objects().get(3).key()); + + response = list("a", "a1/b1/file-3.txt", 5, "b/"); + assertEquals(5, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a1/b1/file-4.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-8.txt", response.objects().get(4).key()); + + response = list("a", "a1/b1/file-3.txt", 6, "b/"); + assertEquals(6, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + assertEquals("a1/b1/file-4.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-9.txt", response.objects().get(5).key()); + + response = list("a", "a/b/file-3.txt", 5, "b/"); + assertEquals(4, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a1/b1/file-0.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-3.txt", response.objects().get(3).key()); + + response = list("a", "a/b/file-3.txt", 10, "b/"); + assertEquals(9, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a1/b1/file-0.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-8.txt", response.objects().get(8).key()); + + response = list("a", "a/b/file-3.txt", 11, "b/"); + assertEquals(10, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/", response.commonPrefixes().get(0)); + assertEquals("a1/b1/file-0.txt", response.objects().get(0).key()); + assertEquals("a1/b1/file-9.txt", response.objects().get(9).key()); + + response = list("a", "a/b/", 1, "b/"); + assertEquals(1, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + + response = list("a/b/c/d", "", 100, "/file"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/c/d/file", response.commonPrefixes().get(0)); + + response = list("a/b/c/d/", "", 100, "file"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a/b/c/d/file", response.commonPrefixes().get(0)); + + + // group objects by different delimiter under 'a1' or 'a1/' + response = list("a1", "", 100, ""); + assertEquals(10, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + + response = list("a1", "", 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a1/", response.commonPrefixes().get(0)); + + response = list("a1/", "", 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a1/b1/", response.commonPrefixes().get(0)); + + response = list("a1/", "", 1, "/"); + assertEquals(0, response.objects().size()); + assertEquals(1, response.commonPrefixes().size()); + assertEquals("a1/b1/", response.commonPrefixes().get(0)); + + // group objects by non-exist delimiter under 'a1' or 'a1/' + response = list("a1", "", 100, "non-exist"); + assertEquals(10, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + + response = list("a1/", "", 100, "non-exist"); + assertEquals(10, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + + // test the sequent of common prefixes + response = list("a", "", 100, "b"); + assertEquals(0, response.objects().size()); + assertEquals(2, response.commonPrefixes().size()); + assertEquals("a/b", response.commonPrefixes().get(0)); + assertEquals("a1/b", response.commonPrefixes().get(1)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testOverwriteDirectoryWithAFile(ObjectStorage store) throws IOException { + setEnv(store); + String dirKey = "a/b/"; + String key = "a/b"; + storage.delete("a/"); + + byte[] data1 = new byte[0]; + byte[] data2 = TestUtility.rand(128); + + storage.put(dirKey, data1); + assertArrayEquals(data1, IOUtils.toByteArray(getStream(dirKey, 0, 256))); + + if (!storage.bucket().isDirectory()) { + // Directory bucket doesn't allow overwrote if the resource type is changed. + storage.put(key, data2); + assertArrayEquals(data2, IOUtils.toByteArray(getStream(key, 0, 256))); + } + + storage.delete(key); + storage.delete(dirKey); + assertNull(storage.head(key)); + assertNull(storage.head(dirKey)); + } + + private InputStream getStream(String key, long off, long limit) { + return storage.get(key, off, limit).stream(); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDeleteNonEmptyDir(ObjectStorage store) throws IOException { + setEnv(store); + storage.put("a/", new byte[0]); + storage.put("a/b/", new byte[0]); + assertArrayEquals(new byte[0], IOUtils.toByteArray(getStream("a/b/", 0, 256))); + + ListObjectsResponse response = list("a/b/", "a/b/", 100, "/"); + assertEquals(0, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + + if (!storage.bucket().isDirectory()) { + // Directory bucket only supports list with delimiter = '/'. + response = list("a/b/", "a/b/", 100, null); + assertEquals(0, response.objects().size()); + assertEquals(0, response.commonPrefixes().size()); + } + + storage.delete("a/b/"); + assertNull(storage.head("a/b/")); + assertNull(storage.head("a/b")); + assertNotNull(storage.head("a/")); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRecursiveDelete(ObjectStorage store) { + setEnv(store); + storage.put("a/", new byte[0]); + storage.put("a/b/", new byte[0]); + storage.put("a/b/c1/", new byte[0]); + storage.put("a/b/c2/", new byte[0]); + storage.put("a/b/c3/", new byte[0]); + assertNotNull(storage.head("a/")); + assertNotNull(storage.head("a/b/")); + assertNotNull(storage.head("a/b/c1/")); + assertNotNull(storage.head("a/b/c2/")); + assertNotNull(storage.head("a/b/c3/")); + + storage.delete("a/b/c3/"); + assertNull(storage.head("a/b/c3/")); + + storage.deleteAll(""); + assertNull(storage.head("a/b/c1/")); + assertNull(storage.head("a/b/c2/")); + assertNull(storage.head("a/b/")); + assertNull(storage.head("a/")); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjectKeys(ObjectStorage store) { + setEnv(store); + assumeFalse(storage.bucket().isDirectory()); + byte[] dirBytes = new byte[0]; + byte[] fileBytes = TestUtility.rand(128); + storage.put("a/b1/", dirBytes); + storage.put("a/b2/c0/", dirBytes); + storage.put("a/b2/c1/d1.txt", fileBytes); + storage.put("a/b2/c1/e1.txt", fileBytes); + storage.put("a/b2/c2.txt", fileBytes); + + // list single dir + List ret = toList(storage.list("a/b1", "", 10)); + assertEquals(1, ret.size()); + assertEquals("a/b1/", ret.get(0).key()); + assertEquals(0, ret.get(0).size()); + + ret = toList(storage.list("a/b1/", "", 10)); + assertEquals(1, ret.size()); + assertEquals("a/b1/", ret.get(0).key()); + assertEquals(0, ret.get(0).size()); + + // list single file + ret = toList(storage.list("a/b2/c1/d1.txt", "", 10)); + assertEquals(1, ret.size()); + assertEquals("a/b2/c1/d1.txt", ret.get(0).key()); + assertEquals(fileBytes.length, ret.get(0).size()); + + // list multiple files & dirs + ret = toList(storage.list("a/b2", "", 10)); + assertEquals(4, ret.size()); + assertEquals("a/b2/c0/", ret.get(0).key()); + assertEquals("a/b2/c1/d1.txt", ret.get(1).key()); + assertEquals("a/b2/c1/e1.txt", ret.get(2).key()); + assertEquals("a/b2/c2.txt", ret.get(3).key()); + assertEquals(dirBytes.length, ret.get(0).size()); + + // list single file with marker + ret = toList(storage.list("a/b2", "a/b2/c1/e1.txt", 10)); + assertEquals(1, ret.size()); + assertEquals("a/b2/c2.txt", ret.get(0).key()); + assertEquals(fileBytes.length, ret.get(0).size()); + + // list multiple files with marker + ret = toList(storage.list("a/b2", "a/b2/c1/", 10)); + assertEquals(3, ret.size()); + assertEquals("a/b2/c1/d1.txt", ret.get(0).key()); + assertEquals("a/b2/c1/e1.txt", ret.get(1).key()); + assertEquals("a/b2/c2.txt", ret.get(2).key()); + assertEquals(fileBytes.length, ret.get(0).size()); + + // list multiple files & dirs with part path as prefix + ret = toList(storage.list("a/b2/c", "", 10)); + assertEquals(4, ret.size()); + assertEquals("a/b2/c0/", ret.get(0).key()); + assertEquals("a/b2/c1/d1.txt", ret.get(1).key()); + assertEquals("a/b2/c1/e1.txt", ret.get(2).key()); + assertEquals("a/b2/c2.txt", ret.get(3).key()); + assertEquals(dirBytes.length, ret.get(0).size()); + + ret = toList(storage.list("a/b2/c", "", 2)); + assertEquals(2, ret.size()); + assertEquals("a/b2/c0/", ret.get(0).key()); + + ret = toList(storage.list("a/b2/c1/d1.", "", 10)); + assertEquals(1, ret.size()); + assertEquals("a/b2/c1/d1.txt", ret.get(0).key()); + assertEquals(fileBytes.length, ret.get(0).size()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListAllObjectKeys(ObjectStorage store) { + setEnv(store); + assumeFalse(storage.bucket().isDirectory()); + byte[] dirBytes = new byte[0]; + byte[] fileBytes = TestUtility.rand(128); + storage.put("a/b1/", dirBytes); + storage.put("a/b2/c0/", dirBytes); + storage.put("a/b2/c1/d1.txt", fileBytes); + storage.put("a/b2/c1/e1.txt", fileBytes); + storage.put("a/b2/c2.txt", dirBytes); + + // list single dir + List ret = Lists.newArrayList(storage.listAll("a/b1", "")); + assertEquals(1, ret.size()); + assertEquals("a/b1/", ret.get(0).key()); + assertEquals(0, ret.get(0).size()); + + // list single file + ret = Lists.newArrayList(storage.listAll("a/b2/c1/d1.txt", "")); + assertEquals(1, ret.size()); + assertEquals("a/b2/c1/d1.txt", ret.get(0).key()); + assertEquals(fileBytes.length, ret.get(0).size()); + + // list multiple files & dirs + ret = Lists.newArrayList(storage.listAll("a/b2", "")); + assertEquals(4, ret.size()); + assertEquals("a/b2/c0/", ret.get(0).key()); + assertEquals("a/b2/c1/d1.txt", ret.get(1).key()); + assertEquals("a/b2/c1/e1.txt", ret.get(2).key()); + assertEquals("a/b2/c2.txt", ret.get(3).key()); + assertEquals(dirBytes.length, ret.get(0).size()); + + // list multiple files & dirs with part path as prefix + ret = Lists.newArrayList(storage.listAll("a/b2/c", "")); + assertEquals(4, ret.size()); + assertEquals("a/b2/c0/", ret.get(0).key()); + assertEquals("a/b2/c1/d1.txt", ret.get(1).key()); + assertEquals("a/b2/c1/e1.txt", ret.get(2).key()); + assertEquals("a/b2/c2.txt", ret.get(3).key()); + assertEquals(dirBytes.length, ret.get(0).size()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListEmptyKeys(ObjectStorage store) { + setEnv(store); + if (storage.bucket().isDirectory()) { + assertEquals(0, + Lists.newArrayList(((DirectoryStorage) storage).listDir("not-exist", true)).size()); + } else { + assertEquals(0, Lists.newArrayList(storage.list("not-exist", "", 2)).size()); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testMultiUploadEmptyFile(ObjectStorage store) { + setEnv(store); + String key = "a/b/empty.txt"; + MultipartUpload upload = storage.createMultipartUpload(key); + assertThrows(Exception.class, + () -> storage.completeUpload(key, upload.uploadId(), Lists.newArrayList())); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testMultiUploadZeroByte(ObjectStorage store) throws IOException { + setEnv(store); + String key = "a/b/zero.txt"; + MultipartUpload upload = storage.createMultipartUpload(key); + Part part = + storage.uploadPart(key, upload.uploadId(), 1, () -> new ByteArrayInputStream(new byte[0]), + 0); + storage.completeUpload(key, upload.uploadId(), Lists.newArrayList(part)); + assertArrayEquals(ObjectTestUtils.EMPTY_BYTES, IOUtils.toByteArray(getStream(key))); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testMultiUploadFile(ObjectStorage store) throws IOException { + setEnv(store); + String key1 = "a/b/c/e.txt"; + String uploadId1 = storage.createMultipartUpload(key1).uploadId(); + assertNotEquals(uploadId1, ""); + + byte[] dataset = multipleUpload(key1, uploadId1, 2, true); + assertArrayEquals(dataset, IOUtils.toByteArray(getStream(key1))); + + String key2 = "a/b/e/e.txt"; + String uploadId2 = storage.createMultipartUpload(key2).uploadId(); + assertNotEquals(uploadId2, ""); + + dataset = multipleUpload(key2, uploadId2, 3, true); + assertArrayEquals(dataset, IOUtils.toByteArray(getStream(key2))); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutAndCompleteMPUWithSameContent(ObjectStorage store) throws IOException { + setEnv(store); + String mpu = "a/b/mpu.txt"; + String put = "a/b/put.txt"; + byte[] dataset = TestUtility.rand(11 << 20); + byte[] checksum = multipleUpload(mpu, dataset); + + storage.put(put, dataset); + + ObjectInfo mputObj = storage.head(mpu); + ObjectInfo putObj = storage.head(put); + assertArrayEquals(checksum, mputObj.checksum()); + assertArrayEquals(checksum, putObj.checksum()); + + if (!storage.bucket().isDirectory()) { + List objectInfo = toList(storage.list(mpu, null, 10)); + assertEquals(mputObj, objectInfo.get(0)); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListUploads(ObjectStorage store) { + setEnv(store); + String key1 = "a/b/c/e.txt"; + String uploadId1 = storage.createMultipartUpload(key1).uploadId(); + assertNotEquals(uploadId1, ""); + multipleUpload(key1, uploadId1, 2, false); + + String key2 = "a/b/e/e.txt"; + String uploadId2 = storage.createMultipartUpload(key2).uploadId(); + assertNotEquals(uploadId2, ""); + multipleUpload(key2, uploadId2, 3, false); + + Iterable iterable = storage.listUploads(""); + List uploads = Lists.newArrayList(iterable.iterator()); + assertEquals(2, uploads.size()); + assertEquals(key1, uploads.get(0).key()); + assertEquals(uploadId1, uploads.get(0).uploadId()); + assertEquals(key2, uploads.get(1).key()); + assertEquals(uploadId2, uploads.get(1).uploadId()); + + // check iterator is idempotent + uploads = Lists.newArrayList(iterable.iterator()); + assertEquals(2, uploads.size()); + assertEquals(key1, uploads.get(0).key()); + assertEquals(uploadId1, uploads.get(0).uploadId()); + assertEquals(key2, uploads.get(1).key()); + assertEquals(uploadId2, uploads.get(1).uploadId()); + + uploads = Lists.newArrayList(storage.listUploads("a/b/")); + assertEquals(2, uploads.size()); + assertEquals(key1, uploads.get(0).key()); + assertEquals(uploadId1, uploads.get(0).uploadId()); + assertEquals(key2, uploads.get(1).key()); + assertEquals(uploadId2, uploads.get(1).uploadId()); + + uploads = Lists.newArrayList(storage.listUploads("a/b/c/")); + assertEquals(1, uploads.size()); + assertEquals(key1, uploads.get(0).key()); + assertEquals(uploadId1, uploads.get(0).uploadId()); + + storage.abortMultipartUpload(key1, uploadId1); + storage.abortMultipartUpload(key2, uploadId2); + assertEquals(0, Lists.newArrayList((storage.listUploads("a/b/"))).size()); + } + + private byte[] multipleUpload(String key, String uploadId, int partCnt, boolean completeUpload) { + int partSize = 5 * 1024 * 1024; + byte[] dataset = new byte[partCnt * partSize]; + byte[] partData = TestUtility.rand(partSize); + try { + int offset = 0; + List parts = new ArrayList<>(); + for (int i = 1; i <= partCnt; i++) { + Part part = storage.uploadPart(key, uploadId, i, () -> new ByteArrayInputStream(partData), + partData.length); + parts.add(part); + System.arraycopy(partData, 0, dataset, offset, partData.length); + offset += partData.length; + } + if (completeUpload) { + storage.completeUpload(key, uploadId, parts); + } + } catch (RuntimeException e) { + storage.abortMultipartUpload(key, uploadId); + } + return dataset; + } + + private byte[] multipleUpload(String key, byte[] dataset) throws IOException { + int partSize = 5 * 1024 * 1024; + int partCnt = (int) Math.ceil((double) dataset.length / partSize); + + String uploadId = storage.createMultipartUpload(key).uploadId(); + assertNotEquals(uploadId, ""); + + try { + List parts = new ArrayList<>(); + for (int i = 0; i < partCnt; i++) { + int start = i * partSize; + int end = Math.min(dataset.length, start + partSize); + byte[] partData = Arrays.copyOfRange(dataset, start, end); + + Part part = + storage.uploadPart(key, uploadId, i + 1, () -> new ByteArrayInputStream(partData), + partData.length); + + assertEquals(DigestUtils.md5Hex(partData), part.eTag().replace("\"", "")); + parts.add(part); + } + + byte[] checksum = storage.completeUpload(key, uploadId, parts); + assertArrayEquals(dataset, IOUtils.toByteArray(getStream(key))); + + return checksum; + } catch (IOException | RuntimeException e) { + storage.abortMultipartUpload(key, uploadId); + throw e; + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testUploadPartCopy10MB(ObjectStorage store) { + setEnv(store); + String srcKey = "src10MB.txt"; + String dstKey = "dst10MB.txt"; + testUploadPartCopy(srcKey, dstKey, 10 << 20); // 10MB + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testUploadPartCopy100MB(ObjectStorage store) { + setEnv(store); + String srcKey = "src100MB.txt"; + String dstKey = "dst100MB.txt"; + testUploadPartCopy(srcKey, dstKey, 100 << 20); // 100MB + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testUploadPartCopy65MB(ObjectStorage store) { + setEnv(store); + String srcKey = "src65MB.txt"; + String dstKey = "dst65MB.txt"; + testUploadPartCopy(srcKey, dstKey, 65 << 20); // 65MB + } + + private void testUploadPartCopy(String srcKey, String key, int fileSize) { + MultipartUpload srcMultipartUpload = storage.createMultipartUpload(srcKey); + long partSize = 5 << 20; + int partCnt = (int) (fileSize / partSize + (fileSize % partSize == 0 ? 0 : 1)); + byte[] data = + multipleUpload(srcMultipartUpload.key(), srcMultipartUpload.uploadId(), partCnt, true); + MultipartUpload dstMultipartUpload = storage.createMultipartUpload(key); + long copyPartRangeStart = 0L; + List results = Lists.newArrayList(); + try { + for (int i = 0; i < partCnt; i++) { + Part result = storage.uploadPartCopy(srcKey, key, dstMultipartUpload.uploadId(), i + 1, + copyPartRangeStart, Math.min(copyPartRangeStart + partSize, fileSize) - 1); + results.add(result); + copyPartRangeStart += partSize; + } + storage.completeUpload(key, dstMultipartUpload.uploadId(), results); + assertArrayEquals(data, IOUtils.toByteArray(getStream(key))); + } catch (Exception e) { + storage.abortMultipartUpload(key, dstMultipartUpload.uploadId()); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testCopy0MB(ObjectStorage store) throws IOException { + setEnv(store); + String srcKey = "src0MB.txt"; + String dstKey = "dst0MB.txt"; + testCopy(srcKey, dstKey, 0); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testCopy5MB(ObjectStorage store) throws IOException { + setEnv(store); + String srcKey = "src5MB.txt"; + String dstKey = "dst5MB.txt"; + testCopy(srcKey, dstKey, 5 << 20); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testCopy10MB(ObjectStorage store) throws IOException { + setEnv(store); + String srcKey = "src10MB.txt"; + String dstKey = "dst10MB.txt"; + testCopy(srcKey, dstKey, 10 << 20); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRename(ObjectStorage store) throws IOException { + setEnv(store); + String srcKey = "src.txt"; + String dstKey = "dst.txt"; + + // Rename source to a un-exist object + renameObject(srcKey, dstKey, 256); + renameObject(srcKey, dstKey, 0); + + // Overwrite an existing object + renameObjectWhenDestExist(srcKey, dstKey, 256, 0); + renameObjectWhenDestExist(srcKey, dstKey, 0, 256); + + assertNull(storage.head(srcKey)); + assertThrows(RuntimeException.class, () -> storage.rename(srcKey, dstKey), + "Source key not found"); + + assertThrows(RuntimeException.class, () -> renameObject(srcKey, srcKey, 256), + "Cannot rename to the same object"); + } + + private void renameObjectWhenDestExist(String srcKey, String dstKey, int srcSize, int destSize) + throws IOException { + byte[] dstData = new byte[destSize]; + storage.put(dstKey, dstData, 0, destSize); + assertArrayEquals(dstData, IOUtils.toByteArray(getStream(dstKey))); + + renameObject(srcKey, dstKey, srcSize); + } + + private void renameObject(String srcKey, String dstKey, int fileSize) throws IOException { + byte[] data = new byte[fileSize]; + storage.put(srcKey, data, 0, fileSize); + assertArrayEquals(data, IOUtils.toByteArray(getStream(srcKey))); + + storage.rename(srcKey, dstKey); + assertArrayEquals(data, IOUtils.toByteArray(getStream(dstKey))); + assertNull(storage.head(srcKey)); + + storage.delete(dstKey); + assertNull(storage.head(dstKey)); + } + + private void testCopy(String srcKey, String dstKey, int fileSize) throws IOException { + byte[] data = new byte[fileSize]; + storage.put(srcKey, data, 0, fileSize); + storage.copy(srcKey, dstKey); + assertArrayEquals(data, IOUtils.toByteArray(getStream(dstKey))); + } + + private ListObjectsResponse list(String prefix, String startAfter, int limit, String delimiter) { + Preconditions.checkArgument(limit <= 1000, "Cannot list more than 1000 objects."); + ListObjectsRequest request = ListObjectsRequest.builder() + .prefix(prefix) + .startAfter(startAfter) + .maxKeys(limit) + .delimiter(delimiter) + .build(); + Iterator iterator = storage.list(request).iterator(); + if (iterator.hasNext()) { + return iterator.next(); + } else { + return new ListObjectsResponse(new ArrayList<>(), new ArrayList<>()); + } + } + + private static List toList(final Iterable iterable) { + return StreamSupport.stream(iterable.spliterator(), false) + .collect(Collectors.toList()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testObjectTagging(ObjectStorage store) { + setEnv(store); + assumeFalse(storage.bucket().isDirectory()); + if (storage instanceof FileStore) { + return; + } + + // create key. + String key = "ObjectTagging"; + String tagPrefix = "tag" + UUIDUtils.random() + "_"; + String valuePrefix = "value" + UUIDUtils.random() + "_"; + storage.put(key, new byte[0], 0, 0); + + Map tagsMap = new HashMap<>(); + for (int i = 0; i < 10; i++) { + tagsMap.put(tagPrefix + i, valuePrefix + i); + } + + // 1. put and get when key exists. + storage.putTags(key, tagsMap); + Map tags = storage.getTags(key); + assertEquals(10, tags.keySet().size()); + assertTrue(Maps.difference(tagsMap, tags).areEqual()); + + // 2. put and get when key doesn't exist. + assertThrows(TosServerException.class, () -> storage.putTags("non-exist-key", tagsMap), + "NoSuchKey"); + assertThrows(TosServerException.class, () -> storage.getTags("non-exist-key"), "doesn't exist"); + + // 3. tag threshold. + Map bigMap = new HashMap<>(tagsMap); + bigMap.put(tagPrefix + 11, valuePrefix + 11); + assertThrows(RuntimeException.class, () -> storage.putTags(key, bigMap), "exceed limit of 10"); + + // 4. put tag with null tagName. + Map nullKeyTag = new HashMap<>(); + nullKeyTag.put(null, "some value"); + assertThrows(TosServerException.class, () -> storage.putTags(key, nullKeyTag), + "TagKey you have provided is invalid"); + + // 5. put tag with null value. + Map nullValueTag = new HashMap<>(); + nullValueTag.put("some-key", null); + storage.putTags(key, nullValueTag); + assertNull(storage.getTags(key).get("some-key")); + + // 6. remove tags. + Map emptyTag = new HashMap<>(); + storage.putTags(key, emptyTag); + assertEquals(0, storage.getTags(key).size()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testObjectChecksum(ObjectStorage store) throws IOException { + setEnv(store); + byte[] data = TestUtility.rand(256); + String key = "a/truncated.txt"; + + // Read object at the end offset. + byte[] checksum = storage.put(key, () -> new ByteArrayInputStream(data), 200); + ObjectContent objContent = storage.get(key, 200, -1); + objContent.stream().close(); + assertArrayEquals(checksum, objContent.checksum()); + + // Read empty object. + checksum = storage.put(key, () -> new ByteArrayInputStream(new byte[0]), 0); + objContent = storage.get(key, 0, -1); + objContent.stream().close(); + assertArrayEquals(checksum, objContent.checksum()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestChainTOSInputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestChainTOSInputStream.java new file mode 100644 index 0000000000000..26533d74c7d75 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestChainTOSInputStream.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos; + +import com.volcengine.tos.model.object.GetObjectBasicOutput; +import com.volcengine.tos.model.object.GetObjectV2Output; +import org.apache.hadoop.fs.tosfs.common.Bytes; +import org.apache.hadoop.fs.tosfs.object.Constants; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestChainTOSInputStream { + + private static final int DATA_SIZE = 1 << 20; + private static final byte[] DATA = TestUtility.rand(DATA_SIZE); + + @Test + public void testRetryReadData() throws IOException { + int readLen = DATA_SIZE - 1; + int cutOff = readLen / 2; + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // The read length is more than the cut-off position, and equal to data length, + // so the first stream will throw IOException, and fallback to the second stream. + byte[] data = new byte[readLen]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, 0, readLen), data); + } + + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // The read length is more than data length, so the first stream will throw IOException, + // and fallback to the second stream. + byte[] data = new byte[readLen + 2]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, 0, readLen), Bytes.toBytes(data, 0, n)); + } + + readLen = DATA_SIZE / 3; + cutOff = DATA_SIZE / 2; + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE, 1024, + cutOff)) { + for (int i = 0; i <= 3; i++) { + // The cut-off position is between (readLen, 2 * readLen), so the data of first read come + // from the first stream, and then the second read will meet IOException, and fallback to + // the second stream. + byte[] data = new byte[readLen]; + int n = stream.read(data); + + int off = i * readLen; + int len = Math.min(readLen, DATA_SIZE - off); + + assertEquals(len, n); + assertArrayEquals(Bytes.toBytes(DATA, off, len), Bytes.toBytes(data, 0, len)); + } + } + + int smallDataSize = 1 << 10; + cutOff = smallDataSize / 2; + byte[] smallData = TestUtility.rand(1 << 10); + try (ChainTOSInputStream stream = createTestChainTOSInputStream(smallData, 0, smallDataSize, + 1024, cutOff)) { + for (int i = 0; i < smallDataSize; i++) { + // The cut-off position is 512, the 512th read operation will meet IOException, + // and then fallback to the second stream. + int read = stream.read(); + assertEquals(smallData[i] & 0xFF, read); + } + } + } + + @Test + public void testSkipAndRead() throws IOException { + int cutOff = (DATA_SIZE - 1) / 2; + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // The skip pos is equal to cut-off pos, once skip finished, the first read operation will + // meet IOException, and the fallback to the second stream. + int readPos = (DATA_SIZE - 1) / 2; + stream.skip(readPos); + + int readLen = 1024; + byte[] data = new byte[readLen]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, readPos, readLen), data); + } + + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // The skip pos is more than cut-off pos, the skip operation will throw IOException, + // and the fallback to the second stream and skip(readPos) again + int readPos = cutOff + 1024; + stream.skip(readPos); + + int readLen = 1024; + byte[] data = new byte[readLen]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, readPos, readLen), data); + } + + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // The skip pos = cut-off pos - 1025, the skip operation will succeed on the first stream, + // the 1024 bytes read operation also succeed on the first stream, + // but the next 1024 bytes read operation will fail on the first stream, and fallback to the + // second stream + int readPos = cutOff - 1024 - 1; + stream.skip(readPos); + + int readLen = 1024; + byte[] data = new byte[readLen]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, readPos, readLen), data); + + n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, readPos + 1024, readLen), data); + } + + try (ChainTOSInputStream stream = createTestChainTOSInputStream(DATA, 0, DATA_SIZE - 1, 1024, + cutOff)) { + // 1. Skip 1024 bytes and then read 1024 bytes from the first stream. + // 2. And then skip cut-off - 512 bytes, the target off = 1024 + 1024 + cut-off - 512, + // which is bigger than cut-off pos, so the second skip operation will fail, + // and then fallback to the second stream. + // 3. Read 1024 bytes + int readPos = 1024; + stream.skip(readPos); + + int readLen = 1024; + byte[] data = new byte[readLen]; + int n = stream.read(data); + assertEquals(readLen, n); + assertArrayEquals(Bytes.toBytes(DATA, readPos, readLen), data); + + int skipPos = cutOff - 512; + stream.skip(skipPos); + + n = stream.read(data); + assertEquals(readLen, n); + int targetOff = readPos + 1024 + skipPos; + assertArrayEquals(Bytes.toBytes(DATA, targetOff, readLen), data); + } + } + + /** + * The ChainTOSInputStream contains two stream created by TestObjectFactory. + * Once the read pos of first stream is more than cutPos, the stream will throw IOException with + * unexpect end of stream error msg, but the second stream will contain the remaining data. + */ + private ChainTOSInputStream createTestChainTOSInputStream(byte[] data, long startOff, long endOff, + long maxDrainSize, long cutPos) { + String key = "dummy-key"; + TOS.GetObjectFactory factory = new TestObjectFactory(data, Arrays.asList(cutPos, -1L)); + return new ChainTOSInputStream(factory, key, startOff, endOff, maxDrainSize, 1); + } + + private static class TestObjectFactory implements TOS.GetObjectFactory { + private final byte[] data; + private final List streamBreakPoses; + private int streamIndex = 0; + + TestObjectFactory(byte[] data, List streamBreakPoses) { + this.data = data; + this.streamBreakPoses = streamBreakPoses; + } + + @Override + public GetObjectOutput create(String key, long offset, long end) { + long len = Math.min(end, data.length) - offset; + ByteArrayInputStream dataIn = new ByteArrayInputStream(this.data, (int) offset, (int) len); + + if (streamIndex < streamBreakPoses.size()) { + return new GetObjectOutput(new GetObjectV2Output(new GetObjectBasicOutput(), + new UnExpectedEndOfStream(dataIn, streamBreakPoses.get(streamIndex++))), + Constants.MAGIC_CHECKSUM); + } else { + throw new RuntimeException("No more output"); + } + } + } + + private static class UnExpectedEndOfStream extends InputStream { + private final ByteArrayInputStream delegate; + private final long breakPos; + private int readPos; + + UnExpectedEndOfStream(ByteArrayInputStream stream, long breakPos) { + delegate = stream; + this.breakPos = breakPos; + } + + @Override + public int read() throws IOException { + if (breakPos != -1 && readPos >= breakPos) { + throw new IOException("unexpected end of stream on dummy source."); + } else { + int n = delegate.read(); + readPos += 1; + return n; + } + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (breakPos != -1 && readPos >= breakPos) { + throw new IOException("unexpected end of stream on dummy source."); + } else { + int n = delegate.read(b, off, len); + readPos += n; + return n; + } + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestDelegationClientBuilder.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestDelegationClientBuilder.java new file mode 100644 index 0000000000000..e7f88373fc56b --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestDelegationClientBuilder.java @@ -0,0 +1,466 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos; + +import com.volcengine.tos.TOSV2; +import com.volcengine.tos.TOSV2ClientBuilder; +import com.volcengine.tos.TosClientException; +import com.volcengine.tos.TosException; +import com.volcengine.tos.TosServerException; +import com.volcengine.tos.auth.Credential; +import com.volcengine.tos.auth.StaticCredentials; +import com.volcengine.tos.comm.HttpStatus; +import com.volcengine.tos.model.object.DeleteObjectInput; +import com.volcengine.tos.model.object.HeadObjectV2Input; +import com.volcengine.tos.model.object.HeadObjectV2Output; +import com.volcengine.tos.model.object.ListObjectsV2Input; +import com.volcengine.tos.model.object.ListObjectsV2Output; +import com.volcengine.tos.model.object.PutObjectInput; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.common.Tasks; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.tos.auth.EnvironmentCredentialsProvider; +import org.apache.hadoop.fs.tosfs.object.tos.auth.SimpleCredentialsProvider; +import org.apache.hadoop.fs.tosfs.util.ParseUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.TestReporter; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; +import java.util.Arrays; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.IntStream; +import javax.net.ssl.SSLException; + +import static org.apache.hadoop.fs.tosfs.object.tos.DelegationClient.isRetryableException; +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assumptions.assumeTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestDelegationClientBuilder { + + private static final String TEST_KEY = UUIDUtils.random(); + private static final String TEST_DATA = "1234567890"; + private static String envAccessKey; + private static String envSecretKey; + private static String envEndpoint; + + // Maximum retry times of the tos http client. + public static final String MAX_RETRY_COUNT_KEY = "fs.tos.http.maxRetryCount"; + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + + envAccessKey = + ParseUtils.envAsString(TOS.ENV_TOS_ACCESS_KEY_ID, false); + envSecretKey = + ParseUtils.envAsString(TOS.ENV_TOS_SECRET_ACCESS_KEY, false); + envEndpoint = ParseUtils.envAsString(TOS.ENV_TOS_ENDPOINT, false); + } + + @BeforeEach + public void setUp() { + TOSV2 tosSdkClientV2 = + new TOSV2ClientBuilder().build(TestUtility.region(), TestUtility.endpoint(), + new StaticCredentials(envAccessKey, envSecretKey)); + try (ByteArrayInputStream stream = new ByteArrayInputStream(TEST_DATA.getBytes())) { + PutObjectInput putObjectInput = + new PutObjectInput().setBucket(TestUtility.bucket()).setKey(TEST_KEY).setContent(stream); + tosSdkClientV2.putObject(putObjectInput); + } catch (IOException e) { + fail(e.getMessage()); + } + } + + @Test + public void testHeadApiRetry() throws IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), + "https://test.tos-cn-beijing.ivolces.com"); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, false); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key("test"), "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key("test"), "SECRET_KEY"); + + DelegationClient tosV2 = new DelegationClientBuilder().bucket("test").conf(conf).build(); + TOSV2 mockClient = mock(TOSV2.class); + tosV2.setClient(mockClient); + tosV2.setMaxRetryTimes(5); + + HeadObjectV2Input input = HeadObjectV2Input.builder().bucket("test").build(); + when(tosV2.headObject(input)).thenThrow( + new TosServerException(HttpStatus.INTERNAL_SERVER_ERROR), + new TosServerException(HttpStatus.TOO_MANY_REQUESTS), + new TosClientException("fake toe", new IOException("fake ioe")), + new TosException(new SocketException("fake msg")), + new TosException(new UnknownHostException("fake msg")), + new TosException(new SSLException("fake msg")), + new TosException(new InterruptedException("fake msg")), + new TosException(new InterruptedException("fake msg"))) + .thenReturn(new HeadObjectV2Output()); + + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tosV2.headObject(input)); + assertTrue(exception instanceof TosException); + assertTrue(exception.getCause() instanceof UnknownHostException); + verify(tosV2.client(), times(5)).headObject(input); + + HeadObjectV2Input inputOneTime = HeadObjectV2Input.builder().bucket("inputOneTime").build(); + HeadObjectV2Output output = new HeadObjectV2Output(); + when(tosV2.headObject(inputOneTime)).thenReturn(output); + HeadObjectV2Output headObject = tosV2.headObject(inputOneTime); + assertEquals(headObject, output); + verify(tosV2.client(), times(1)).headObject(inputOneTime); + tosV2.close(); + + DelegationClient newClient = new DelegationClientBuilder().bucket("test").conf(conf).build(); + mockClient = mock(TOSV2.class); + newClient.setClient(mockClient); + newClient.setMaxRetryTimes(5); + when(newClient.headObject(input)).thenThrow( + new TosClientException("fake toe", new EOFException("fake eof")), + new TosServerException(HttpStatus.INTERNAL_SERVER_ERROR), + new TosServerException(HttpStatus.TOO_MANY_REQUESTS)).thenReturn(new HeadObjectV2Output()); + + exception = assertThrows(RuntimeException.class, () -> newClient.headObject(input)); + assertTrue(exception instanceof TosClientException); + assertTrue(exception.getCause() instanceof EOFException); + verify(newClient.client(), times(1)).headObject(input); + newClient.close(); + } + + @Test + public void testEnableCrcCheck(TestInfo testInfo, TestReporter testReporter) throws IOException { + String bucket = testInfo.getTestMethod().map(method -> method.getName()).orElse("Unknown"); + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), + "https://test.tos-cn-beijing.ivolces.com"); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, true); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(bucket), "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(bucket), "SECRET_KEY"); + + DelegationClient tosV2 = new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + assertTrue(tosV2.config().isEnableCrc()); + + conf.setBoolean(TosKeys.FS_TOS_CRC_CHECK_ENABLED, false); + tosV2 = new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + assertFalse(tosV2.config().isEnableCrc()); + + tosV2.close(); + } + + @Test + public void testClientCache(TestInfo testInfo, TestReporter testReporter) throws IOException { + String bucket = testInfo.getTestMethod().map(method -> method.getName()).orElse("Unknown"); + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), + "https://test.tos-cn-beijing.ivolces.com"); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, false); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(bucket), "ACCESS_KEY_A"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(bucket), "SECRET_KEY_A"); + + DelegationClient tosV2 = new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + DelegationClient tosV2Cached = new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + assertEquals(tosV2Cached, tosV2, "client must be load in cache"); + assertEquals("ACCESS_KEY_A", tosV2.usedCredential().getAccessKeyId()); + tosV2Cached.close(); + + String newBucket = "new-test-bucket"; + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(newBucket), "ACCESS_KEY_B"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(newBucket), "SECRET_KEY_B"); + DelegationClient changeBucketClient = + new DelegationClientBuilder().bucket(newBucket).conf(conf).build(); + assertNotEquals(changeBucketClient, tosV2, "client should be created entirely new"); + assertEquals("ACCESS_KEY_B", changeBucketClient.usedCredential().getAccessKeyId()); + changeBucketClient.close(); + + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, true); // disable cache: true + DelegationClient tosV2NotCached = + new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + assertNotEquals(tosV2NotCached, tosV2, "client should be created entirely new"); + assertEquals("ACCESS_KEY_A", tosV2NotCached.usedCredential().getAccessKeyId()); + tosV2NotCached.close(); + + tosV2.close(); + } + + @Test + public void testOverwriteHttpConfig() throws IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), + "https://tos-cn-beijing.ivolces.com"); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key("test"), "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key("test"), "SECRET_KEY"); + conf.setInt(TosKeys.FS_TOS_HTTP_MAX_CONNECTIONS, 24); + conf.setInt(MAX_RETRY_COUNT_KEY, 24); + conf.setInt(TosKeys.FS_TOS_REQUEST_MAX_RETRY_TIMES, 24); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, true); + + DelegationClient tosV2 = new DelegationClientBuilder().bucket("test").conf(conf).build(); + assertEquals("ACCESS_KEY", tosV2.usedCredential().getAccessKeyId()); + assertEquals(24, tosV2.config().getTransportConfig().getMaxConnections(), + "http max connection overwrite to 24 from 1024, must be 24"); + assertEquals(DelegationClientBuilder.DISABLE_TOS_RETRY_VALUE, + tosV2.config().getTransportConfig().getMaxRetryCount(), + "tos maxRetryCount disabled, must be -1"); + assertEquals(24, tosV2.maxRetryTimes(), "maxRetryTimes must be 24"); + assertEquals("https://tos-cn-beijing.ivolces.com", tosV2.config().getEndpoint(), + "endpoint must be equals to https://tos-cn-beijing.ivolces.com"); + + tosV2.close(); + } + + @Test + public void testDynamicRefreshAkSk() throws IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), envEndpoint); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(TestUtility.bucket()), envAccessKey); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(TestUtility.bucket()), envSecretKey); + conf.setInt(TosKeys.FS_TOS_HTTP_MAX_CONNECTIONS, 24); + conf.setInt(MAX_RETRY_COUNT_KEY, 24); + + TOSV2 tosSdkClientV2 = + new TOSV2ClientBuilder().build(TestUtility.region(), TestUtility.endpoint(), + new StaticCredentials("a", "b")); + DelegationClient delegationClientV2 = + new DelegationClientBuilder().bucket(TestUtility.bucket()).conf(conf).build(); + + ListObjectsV2Input inputV2 = + ListObjectsV2Input.builder().bucket(TestUtility.bucket()).prefix(TEST_KEY).marker("") + .maxKeys(10).build(); + + assertThrows(TosServerException.class, () -> tosSdkClientV2.listObjects(inputV2)); + + tosSdkClientV2.changeCredentials(new StaticCredentials(envAccessKey, envSecretKey)); + + ListObjectsV2Output tosSdkOutput = tosSdkClientV2.listObjects(inputV2); + ListObjectsV2Output delegateOutput = delegationClientV2.listObjects(inputV2); + int nativeContentSize = + tosSdkOutput.getContents() == null ? -1 : tosSdkOutput.getContents().size(); + int delegateContentSize = + delegateOutput.getContents() == null ? -1 : delegateOutput.getContents().size(); + + assertEquals(nativeContentSize, delegateContentSize, + "delegation client must same as native client"); + assertEquals(envAccessKey, delegationClientV2.usedCredential().getAccessKeyId()); + + delegationClientV2.close(); + } + + @Test + public void testCreateClientWithEnvironmentCredentials() throws IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), envEndpoint); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, EnvironmentCredentialsProvider.NAME); + + DelegationClient tosV2 = + new DelegationClientBuilder().bucket(TestUtility.bucket()).conf(conf).build(); + Credential cred = tosV2.usedCredential(); + + String assertMsg = + String.format("expect %s, but got %s", envAccessKey, cred.getAccessKeyId()); + assertEquals(cred.getAccessKeyId(), envAccessKey, assertMsg); + assertMsg = String.format("expect %s, but got %s", envSecretKey, cred.getAccessKeySecret()); + assertEquals(cred.getAccessKeySecret(), envSecretKey, assertMsg); + + tosV2.close(); + } + + @Test + public void testCreateClientWithSimpleCredentials() throws IOException { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), envEndpoint); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(TestUtility.bucket()), envAccessKey); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(TestUtility.bucket()), envSecretKey); + conf.setInt(TosKeys.FS_TOS_HTTP_MAX_CONNECTIONS, 24); + conf.setInt(MAX_RETRY_COUNT_KEY, 24); + + ListObjectsV2Input input = + ListObjectsV2Input.builder().bucket(TestUtility.bucket()).prefix(TEST_KEY).marker("") + .maxKeys(10).build(); + + TOSV2 v2 = new TOSV2ClientBuilder().build(TestUtility.region(), TestUtility.endpoint(), + new StaticCredentials(envAccessKey, envSecretKey)); + ListObjectsV2Output outputV2 = v2.listObjects(input); + + DelegationClient tosV2 = + new DelegationClientBuilder().bucket(TestUtility.bucket()).conf(conf).build(); + + ListObjectsV2Output output = tosV2.listObjects(input); + assertEquals(outputV2.getContents().size(), output.getContents().size(), + "delegation client must be same as native client"); + + tosV2.close(); + } + + @Test + public void testCachedConcurrently(TestInfo testInfo, TestReporter testReporter) { + String bucketName = testInfo.getTestMethod().map(method -> method.getName()).orElse("Unknown"); + + Function commonConf = bucket -> { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), envEndpoint); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key(bucket), envAccessKey); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key(bucket), envSecretKey); + return conf; + }; + + // enable cache + Function enableCachedConf = bucket -> { + Configuration conf = commonConf.apply(bucket); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, false); + return conf; + }; + + ExecutorService es = ThreadPools.newWorkerPool("testCachedConcurrently", 32); + int bucketCount = 5; + int taskCount = 10000; + + AtomicInteger success = new AtomicInteger(0); + AtomicInteger failure = new AtomicInteger(0); + Tasks.foreach(IntStream.range(0, taskCount).boxed().map(i -> bucketName + (i % bucketCount))) + .executeWith(es).run(bucket -> { + try { + Configuration conf = enableCachedConf.apply(bucket); + DelegationClient client = + new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + client.close(); + success.incrementAndGet(); + } catch (Exception e) { + failure.incrementAndGet(); + } + }); + + assertEquals(bucketCount, DelegationClientBuilder.CACHE.size()); + assertEquals(taskCount, success.get()); + assertEquals(0, failure.get()); + + // clear cache + DelegationClientBuilder.CACHE.clear(); + + // disable cache + Function disableCachedConf = bucket -> { + Configuration conf = commonConf.apply(bucket); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, true); + return conf; + }; + + success.set(0); + failure.set(0); + Tasks.foreach(IntStream.range(0, taskCount).boxed().map(i -> bucketName + (i % bucketCount))) + .executeWith(es).run(bucket -> { + try { + Configuration conf = disableCachedConf.apply(bucket); + DelegationClient client = + new DelegationClientBuilder().bucket(bucket).conf(conf).build(); + client.close(); + success.incrementAndGet(); + } catch (Exception e) { + failure.incrementAndGet(); + } + }); + + assertTrue(DelegationClientBuilder.CACHE.isEmpty()); + assertEquals(taskCount, success.get()); + assertEquals(0, failure.get()); + + es.shutdown(); + } + + @AfterEach + public void deleteAllTestData() throws IOException { + TOSV2 tosSdkClientV2 = + new TOSV2ClientBuilder().build(TestUtility.region(), TestUtility.endpoint(), + new StaticCredentials(envAccessKey, envSecretKey)); + tosSdkClientV2.deleteObject( + DeleteObjectInput.builder().bucket(TestUtility.bucket()).key(TEST_KEY).build()); + + tosSdkClientV2.close(); + DelegationClientBuilder.CACHE.clear(); + } + + @Test + public void testRetryableException() { + assertTrue(retryableException(new TosServerException(500))); + assertTrue(retryableException(new TosServerException(501))); + assertTrue(retryableException(new TosServerException(429))); + assertFalse(retryableException(new TosServerException(404))); + + assertTrue(retryableException(new TosException(new SocketException()))); + assertTrue(retryableException(new TosException(new UnknownHostException()))); + assertTrue(retryableException(new TosException(new SSLException("fake ssl")))); + assertTrue(retryableException(new TosException(new SocketTimeoutException()))); + assertTrue(retryableException(new TosException(new InterruptedException()))); + + assertTrue(retryableException(new TosClientException("fake ioe", new IOException()))); + assertFalse(retryableException(new TosClientException("fake eof", new EOFException()))); + + assertTrue(retryableException(new TosServerException(409))); + assertTrue( + retryableException(new TosServerException(409).setEc(TOSErrorCodes.PATH_LOCK_CONFLICT))); + assertFalse( + retryableException(new TosServerException(409).setEc(TOSErrorCodes.DELETE_NON_EMPTY_DIR))); + assertFalse( + retryableException(new TosServerException(409).setEc(TOSErrorCodes.LOCATED_UNDER_A_FILE))); + assertFalse(retryableException( + new TosServerException(409).setEc(TOSErrorCodes.COPY_BETWEEN_DIR_AND_FILE))); + assertFalse(retryableException( + new TosServerException(409).setEc(TOSErrorCodes.RENAME_TO_AN_EXISTED_DIR))); + assertFalse( + retryableException(new TosServerException(409).setEc(TOSErrorCodes.RENAME_TO_SUB_DIR))); + assertFalse(retryableException( + new TosServerException(409).setEc(TOSErrorCodes.RENAME_BETWEEN_DIR_AND_FILE))); + } + + private boolean retryableException(TosException e) { + return isRetryableException(e, + Arrays.asList(TOSErrorCodes.FAST_FAILURE_CONFLICT_ERROR_CODES.split(","))); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSInputStream.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSInputStream.java new file mode 100644 index 0000000000000..16e58fb1cb89d --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSInputStream.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos; + +import com.volcengine.tos.internal.util.aborthook.AbortInputStreamHook; +import com.volcengine.tos.model.object.GetObjectBasicOutput; +import com.volcengine.tos.model.object.GetObjectV2Output; +import org.apache.hadoop.fs.tosfs.object.Constants; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.thirdparty.com.google.common.io.ByteStreams; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestTOSInputStream { + + private static final int DATA_SIZE = 1 << 20; + private static final byte[] DATA = TestUtility.rand(DATA_SIZE); + + @Test + public void testForceClose() throws IOException { + TOSInputStream stream = createStream(DATA, 0, DATA_SIZE - 1, 1024); + stream.close(); + assertTrue(cast(stream).isForceClose(), "Expected force close"); + + stream = createStream(DATA, 0, DATA_SIZE - 1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1024 - 1); + stream.close(); + assertTrue(cast(stream).isForceClose(), "Expected force close"); + + stream = createStream(DATA, 0, -1, 1024); + stream.close(); + assertTrue(cast(stream).isForceClose(), "Expected force close"); + + stream = createStream(DATA, 0, -1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1024 - 1); + stream.close(); + assertTrue(cast(stream).isForceClose(), "Expected force close"); + + stream = createStream(DATA, 0, -1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1024); + stream.close(); + assertTrue(cast(stream).isForceClose(), "Expected force close"); + } + + @Test + public void testClose() throws IOException { + TOSInputStream stream = createStream(DATA, 0, DATA_SIZE - 1, DATA_SIZE); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + + stream = createStream(DATA, 0, DATA_SIZE - 1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1024); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + + stream = createStream(DATA, 0, DATA_SIZE - 1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1023); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + + stream = createStream(DATA, 0, -1, DATA_SIZE + 1); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + + stream = createStream(DATA, 0, -1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE - 1023); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + + stream = createStream(DATA, 0, -1, 1024); + ByteStreams.skipFully(stream, DATA_SIZE); + stream.close(); + assertFalse(cast(stream).isForceClose(), "Expected close by skipping bytes"); + } + + private TestInputStream cast(TOSInputStream stream) throws IOException { + InputStream content = stream.getObjectOutput().verifiedContent(Constants.MAGIC_CHECKSUM); + assertTrue(content instanceof TestInputStream, "Not a TestInputStream"); + return (TestInputStream) content; + } + + private TOSInputStream createStream(byte[] data, long startOff, long endOff, long maxDrainSize) + throws IOException { + TestInputStream stream = + new TestInputStream(data, (int) startOff, (int) (data.length - startOff)); + GetObjectV2Output output = new GetObjectV2Output(new GetObjectBasicOutput(), stream).setHook( + new ForceCloseHook(stream)); + + return new TOSInputStream(new GetObjectOutput(output, Constants.MAGIC_CHECKSUM), startOff, + endOff, maxDrainSize, Constants.MAGIC_CHECKSUM); + } + + private final static class TestInputStream extends ByteArrayInputStream { + // -1 means call close() + // 0 means neither call close() nor forceClose() + // 1 means call forceClose() + private int cloeState = 0; + + private TestInputStream(byte[] buf, int off, int len) { + super(buf, off, len); + } + + @Override + public void close() { + cloeState = -1; + } + + public void forceClose() { + cloeState = 1; + } + + boolean isForceClose() { + assertTrue(cloeState == -1 || cloeState == 1, "Neither call close() nor forceClose()"); + return cloeState == 1; + } + } + + private final static class ForceCloseHook implements AbortInputStreamHook { + private final TestInputStream in; + + private ForceCloseHook(TestInputStream in) { + this.in = in; + } + + @Override public void abort() { + if (in != null) { + in.forceClose(); + } + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSObjectStorage.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSObjectStorage.java new file mode 100644 index 0000000000000..1cc4b4c27fb0d --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSObjectStorage.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos; + +import com.volcengine.tos.internal.model.CRC64Checksum; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.common.Bytes; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.ChecksumType; +import org.apache.hadoop.fs.tosfs.object.Constants; +import org.apache.hadoop.fs.tosfs.object.MultipartUpload; +import org.apache.hadoop.fs.tosfs.object.ObjectInfo; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.object.Part; +import org.apache.hadoop.fs.tosfs.object.exceptions.NotAppendableException; +import org.apache.hadoop.fs.tosfs.object.request.ListObjectsRequest; +import org.apache.hadoop.fs.tosfs.object.response.ListObjectsResponse; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.apache.hadoop.util.PureJavaCrc32C; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.ByteArrayInputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Stream; +import java.util.zip.Checksum; + +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assumptions.assumeFalse; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestTOSObjectStorage { + public static Stream provideArguments() { + assumeTrue(TestEnv.checkTestEnabled()); + + List values = new ArrayList<>(); + + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_CHECKSUM_TYPE, ChecksumType.CRC64ECMA.name()); + values.add(Arguments.of( + ObjectStorageFactory.createWithPrefix(String.format("tos-%s/", UUIDUtils.random()), + TOS_SCHEME, TestUtility.bucket(), conf), + new CRC64Checksum(), + ChecksumType.CRC64ECMA)); + + conf = new Configuration(); + conf.set(TosKeys.FS_TOS_CHECKSUM_TYPE, ChecksumType.CRC32C.name()); + values.add(Arguments.of( + ObjectStorageFactory.createWithPrefix(String.format("tos-%s/", UUIDUtils.random()), + TOS_SCHEME, TestUtility.bucket(), conf), + new PureJavaCrc32C(), + ChecksumType.CRC32C)); + + return values.stream(); + } + + private ObjectStorage tos; + private ChecksumType type; + + private void setEnv(ObjectStorage objectStore, ChecksumType csType) { + this.tos = objectStore; + this.type = csType; + } + + @AfterEach + public void tearDown() throws Exception { + CommonUtils.runQuietly(() -> tos.deleteAll("")); + for (MultipartUpload upload : tos.listUploads("")) { + tos.abortMultipartUpload(upload.key(), upload.uploadId()); + } + tos.close(); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testHeadObj(ObjectStorage objectStore, Checksum ckmer, ChecksumType csType) { + setEnv(objectStore, csType); + + String key = "testPutChecksum"; + byte[] data = TestUtility.rand(1024); + ckmer.update(data, 0, data.length); + assertEquals(ckmer.getValue(), parseChecksum(objectStore.put(key, data))); + + ObjectInfo objInfo = objectStore.head(key); + assertEquals(ckmer.getValue(), parseChecksum(objInfo.checksum())); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testGetFileStatus(ObjectStorage objectStore, Checksum ckmer, ChecksumType csType) { + setEnv(objectStore, csType); + assumeFalse(objectStore.bucket().isDirectory()); + + Configuration conf = new Configuration(objectStore.conf()); + conf.setBoolean(TosKeys.FS_TOS_GET_FILE_STATUS_ENABLED, true); + objectStore.initialize(conf, objectStore.bucket().name()); + + String key = "testFileStatus"; + byte[] data = TestUtility.rand(256); + byte[] checksum = objectStore.put(key, data); + + ObjectInfo obj1 = objectStore.objectStatus(key); + assertArrayEquals(checksum, obj1.checksum()); + assertEquals(key, obj1.key()); + assertEquals(obj1, objectStore.head(key)); + + ObjectInfo obj2 = objectStore.objectStatus(key + "/"); + assertNull(obj2); + + String dirKey = "testDirStatus/"; + checksum = objectStore.put(dirKey, new byte[0]); + + ObjectInfo obj3 = objectStore.objectStatus("testDirStatus"); + assertArrayEquals(checksum, obj3.checksum()); + assertEquals(dirKey, obj3.key()); + assertEquals(obj3, objectStore.head(dirKey)); + assertNull(objectStore.head("testDirStatus")); + ObjectInfo obj4 = objectStore.objectStatus(dirKey); + assertArrayEquals(checksum, obj4.checksum()); + assertEquals(dirKey, obj4.key()); + assertEquals(obj4, objectStore.head(dirKey)); + + String prefix = "testPrefix/"; + objectStore.put(prefix + "subfile", data); + ObjectInfo obj5 = objectStore.objectStatus(prefix); + assertEquals(prefix, obj5.key()); + assertArrayEquals(Constants.MAGIC_CHECKSUM, obj5.checksum()); + assertNull(objectStore.head(prefix)); + ObjectInfo obj6 = objectStore.objectStatus("testPrefix"); + assertEquals(prefix, obj6.key()); + assertArrayEquals(Constants.MAGIC_CHECKSUM, obj6.checksum()); + assertNull(objectStore.head("testPrefix")); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testObjectStatus(ObjectStorage objectStore, Checksum checksum, ChecksumType csType) { + setEnv(objectStore, csType); + assumeFalse(objectStore.bucket().isDirectory()); + + String key = "testObjectStatus"; + byte[] data = TestUtility.rand(1024); + checksum.update(data, 0, data.length); + assertEquals(checksum.getValue(), parseChecksum(objectStore.put(key, data))); + + ObjectInfo objInfo = objectStore.objectStatus(key); + assertEquals(checksum.getValue(), parseChecksum(objInfo.checksum())); + + objInfo = objectStore.head(key); + assertEquals(checksum.getValue(), parseChecksum(objInfo.checksum())); + + String dir = key + "/"; + objectStore.put(dir, new byte[0]); + objInfo = objectStore.objectStatus(dir); + assertEquals(Constants.MAGIC_CHECKSUM, objInfo.checksum()); + + objInfo = objectStore.head(dir); + assertEquals(Constants.MAGIC_CHECKSUM, objInfo.checksum()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListObjs(ObjectStorage objectStore, Checksum checksum, ChecksumType csType) { + setEnv(objectStore, csType); + + String key = "testListObjs"; + byte[] data = TestUtility.rand(1024); + checksum.update(data, 0, data.length); + for (int i = 0; i < 5; i++) { + assertEquals(checksum.getValue(), parseChecksum(objectStore.put(key, data))); + } + + ListObjectsRequest request = + ListObjectsRequest.builder().prefix(key).startAfter(null).maxKeys(-1).delimiter("/") + .build(); + Iterator iter = objectStore.list(request).iterator(); + while (iter.hasNext()) { + List objs = iter.next().objects(); + for (ObjectInfo obj : objs) { + assertEquals(checksum.getValue(), parseChecksum(obj.checksum())); + } + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testPutChecksum(ObjectStorage objectStore, Checksum checksum, ChecksumType csType) { + setEnv(objectStore, csType); + + String key = "testPutChecksum"; + byte[] data = TestUtility.rand(1024); + checksum.update(data, 0, data.length); + + byte[] checksumStr = objectStore.put(key, data); + + assertEquals(checksum.getValue(), parseChecksum(checksumStr)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testMPUChecksum(ObjectStorage objectStore, Checksum checksum, ChecksumType csType) { + setEnv(objectStore, csType); + + int partNumber = 2; + String key = "testMPUChecksum"; + MultipartUpload mpu = objectStore.createMultipartUpload(key); + byte[] data = TestUtility.rand(mpu.minPartSize() * partNumber); + checksum.update(data, 0, data.length); + + List parts = new ArrayList<>(); + for (int i = 0; i < partNumber; i++) { + final int index = i; + Part part = objectStore.uploadPart(key, mpu.uploadId(), index + 1, + () -> new ByteArrayInputStream(data, index * mpu.minPartSize(), mpu.minPartSize()), + mpu.minPartSize()); + parts.add(part); + } + + byte[] checksumStr = objectStore.completeUpload(key, mpu.uploadId(), parts); + assertEquals(checksum.getValue(), parseChecksum(checksumStr)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testAppendable(ObjectStorage objectStore, Checksum checksum, ChecksumType csType) { + setEnv(objectStore, csType); + assumeFalse(objectStore.bucket().isDirectory()); + + // Test create object with append then append. + byte[] data = TestUtility.rand(256); + String prefix = "a/testAppendable/"; + String key = prefix + "object.txt"; + objectStore.append(key, data); + + objectStore.append(key, new byte[0]); + + // Test create object with put then append. + data = TestUtility.rand(256); + objectStore.put(key, data); + + assertThrows(NotAppendableException.class, () -> objectStore.append(key, new byte[0]), + "Expect not appendable."); + + objectStore.delete(key); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDirectoryBucketAppendable(ObjectStorage objectStore, Checksum checksum, + ChecksumType csType) { + setEnv(objectStore, csType); + assumeTrue(objectStore.bucket().isDirectory()); + + byte[] data = TestUtility.rand(256); + String prefix = "a/testAppendable/"; + String key = prefix + "object.txt"; + objectStore.put(key, data); + + objectStore.append(key, new byte[1024]); + + objectStore.delete(key); + } + + private long parseChecksum(byte[] checksum) { + switch (type) { + case CRC32C: + case CRC64ECMA: + return Bytes.toLong(checksum); + default: + throw new IllegalArgumentException( + String.format("Checksum type %s is not supported by TOS.", type.name())); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSRetryPolicy.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSRetryPolicy.java new file mode 100644 index 0000000000000..c2d21f55d9dfd --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/TestTOSRetryPolicy.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos; + +import com.volcengine.tos.TOSV2; +import com.volcengine.tos.TosException; +import com.volcengine.tos.TosServerException; +import com.volcengine.tos.comm.HttpStatus; +import com.volcengine.tos.model.RequestInfo; +import com.volcengine.tos.model.object.PutObjectOutput; +import com.volcengine.tos.model.object.UploadPartV2Output; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.InputStreamProvider; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.object.Part; +import org.apache.hadoop.fs.tosfs.object.tos.auth.SimpleCredentialsProvider; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.HashMap; +import java.util.Map; +import javax.net.ssl.SSLException; + +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class TestTOSRetryPolicy { + + private final String retryKey = "retryKey.txt"; + private TOSV2 tosClient; + private DelegationClient client; + + @BeforeAll + public static void before() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @BeforeEach + public void setUp() { + client = createRetryableDelegationClient(); + tosClient = mock(TOSV2.class); + client.setClient(tosClient); + } + + @AfterEach + public void tearDown() throws IOException { + tosClient.close(); + client.close(); + } + + private DelegationClient createRetryableDelegationClient() { + Configuration conf = new Configuration(); + conf.set(ConfKeys.FS_OBJECT_STORAGE_ENDPOINT.key(TOS_SCHEME), + "https://tos-cn-beijing.ivolces.com"); + conf.set(TosKeys.FS_TOS_CREDENTIALS_PROVIDER, SimpleCredentialsProvider.NAME); + conf.setBoolean(TosKeys.FS_TOS_DISABLE_CLIENT_CACHE, true); + conf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, "SECRET_KEY"); + return new DelegationClientBuilder().bucket("test").conf(conf).build(); + } + + @Test + public void testShouldThrowExceptionAfterRunOut5RetryTimesIfNoRetryConfigSet() + throws IOException { + TOS storage = + (TOS) ObjectStorageFactory.create(TOS_SCHEME, TestUtility.bucket(), new Configuration()); + storage.setClient(client); + client.setMaxRetryTimes(5); + + PutObjectOutput response = mock(PutObjectOutput.class); + InputStreamProvider streamProvider = mock(InputStreamProvider.class); + + when(tosClient.putObject(any())).thenThrow( + new TosServerException(HttpStatus.INTERNAL_SERVER_ERROR), + new TosServerException(HttpStatus.TOO_MANY_REQUESTS), + new TosException(new SocketException("fake msg")), + new TosException(new UnknownHostException("fake msg")), + new TosException(new SSLException("fake msg")), + new TosException(new InterruptedException("fake msg")), + new TosException(new InterruptedException("fake msg"))).thenReturn(response); + + // after run out retry times, should throw exception + RuntimeException exception = + assertThrows(RuntimeException.class, () -> storage.put(retryKey, streamProvider, 0)); + assertTrue(exception instanceof TosException); + assertTrue(exception.getCause() instanceof SSLException); + + // the newStream method of stream provider should be called 5 times + verify(streamProvider, times(5)).newStream(); + + storage.close(); + } + + @Test + public void testShouldReturnResultAfterRetry8TimesIfConfigured10TimesRetry() + throws IOException { + TOS storage = + (TOS) ObjectStorageFactory.create(TOS_SCHEME, TestUtility.bucket(), new Configuration()); + DelegationClient delegationClient = createRetryableDelegationClient(); + delegationClient.setClient(tosClient); + delegationClient.setMaxRetryTimes(10); + storage.setClient(delegationClient); + + UploadPartV2Output response = new UploadPartV2Output().setPartNumber(1).setEtag("etag"); + + InputStream in = mock(InputStream.class); + InputStreamProvider streamProvider = mock(InputStreamProvider.class); + when(streamProvider.newStream()).thenReturn(in); + + when(tosClient.uploadPart(any())).thenThrow( + new TosServerException(HttpStatus.INTERNAL_SERVER_ERROR), + new TosServerException(HttpStatus.TOO_MANY_REQUESTS), + new TosException(new SocketException("fake msg")), + new TosException(new UnknownHostException("fake msg")), + new TosException(new SSLException("fake msg")), + new TosException(new InterruptedException("fake msg")), + new TosException(new InterruptedException("fake msg"))).thenReturn(response); + + // after run out retry times, should throw exception + Part part = storage.uploadPart(retryKey, "uploadId", 1, streamProvider, 0); + assertEquals(1, part.num()); + assertEquals("etag", part.eTag()); + + // the newStream method of stream provider should be called 8 times + verify(streamProvider, times(8)).newStream(); + + storage.close(); + } + + @Test + public void testShouldReturnResultIfRetry3TimesSucceed() throws IOException { + TOS storage = + (TOS) ObjectStorageFactory.create(TOS_SCHEME, TestUtility.bucket(), new Configuration()); + storage.setClient(client); + + PutObjectOutput response = mock(PutObjectOutput.class); + InputStreamProvider streamProvider = mock(InputStreamProvider.class); + + RequestInfo requestInfo = mock(RequestInfo.class); + Map header = new HashMap<>(); + when(response.getRequestInfo()).thenReturn(requestInfo); + when(requestInfo.getHeader()).thenReturn(header); + + when(tosClient.putObject(any())).thenThrow( + new TosServerException(HttpStatus.INTERNAL_SERVER_ERROR), + new TosServerException(HttpStatus.TOO_MANY_REQUESTS)).thenReturn(response); + + storage.put(retryKey, streamProvider, 0); + // the newStream method of stream provider should be called 3 times + verify(streamProvider, times(3)).newStream(); + + storage.close(); + } + + @Test + public void testShouldNotRetryIfThrowUnRetryException() throws IOException { + TOS storage = + (TOS) ObjectStorageFactory.create(TOS_SCHEME, TestUtility.bucket(), new Configuration()); + storage.setClient(client); + + InputStreamProvider streamProvider = mock(InputStreamProvider.class); + + when(tosClient.putObject(any())).thenThrow( + new TosException(new NullPointerException("fake msg."))); + + RuntimeException exception = + assertThrows(RuntimeException.class, () -> storage.put(retryKey, streamProvider, 0)); + assertTrue(exception instanceof TosException); + assertTrue(exception.getCause() instanceof NullPointerException); + + // the newStream method of stream provider should be only called once. + verify(streamProvider, times(1)).newStream(); + + storage.close(); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestAbstractCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestAbstractCredentialsProvider.java new file mode 100644 index 0000000000000..d4db4e3c3d4d6 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestAbstractCredentialsProvider.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos.auth; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.object.tos.TOS; +import org.apache.hadoop.fs.tosfs.util.TestUtility; + +public abstract class TestAbstractCredentialsProvider { + private String envAccessKeyId; + private String envSecretAccessKey; + private String envSessionToken; + + protected Configuration getConf() { + return new Configuration(); + } + + protected void saveOsCredEnv() { + if (StringUtils.isNotEmpty(System.getenv(TOS.ENV_TOS_ACCESS_KEY_ID))) { + envAccessKeyId = System.getenv(TOS.ENV_TOS_ACCESS_KEY_ID); + } + + if (StringUtils.isNotEmpty(System.getenv(TOS.ENV_TOS_SECRET_ACCESS_KEY))) { + envSecretAccessKey = System.getenv(TOS.ENV_TOS_SECRET_ACCESS_KEY); + } + + if (StringUtils.isNotEmpty(System.getenv(TOS.ENV_TOS_SESSION_TOKEN))) { + envSessionToken = System.getenv(TOS.ENV_TOS_SESSION_TOKEN); + } + } + + protected void resetOsCredEnv() { + resetOsCredEnv(TOS.ENV_TOS_ACCESS_KEY_ID, envAccessKeyId); + resetOsCredEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY, envSecretAccessKey); + resetOsCredEnv(TOS.ENV_TOS_SESSION_TOKEN, envSessionToken); + } + + private void resetOsCredEnv(String key, String value) { + if (StringUtils.isNotEmpty(value)) { + TestUtility.setSystemEnv(key, value); + } else { + TestUtility.removeSystemEnv(key); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestDefaultCredentialsProviderChain.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestDefaultCredentialsProviderChain.java new file mode 100644 index 0000000000000..c2b7b4da3ebcc --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestDefaultCredentialsProviderChain.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos.auth; + +import com.volcengine.tos.TosException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.apache.hadoop.fs.tosfs.object.tos.TOS; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.apache.hadoop.fs.tosfs.util.TestUtility.removeSystemEnv; +import static org.apache.hadoop.fs.tosfs.util.TestUtility.setSystemEnv; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestDefaultCredentialsProviderChain extends TestAbstractCredentialsProvider { + + private static final String MOCK_TEST_AK = "AK"; + private static final String MOCK_TEST_SK = "SK"; + private static final String MOCK_TEST_TST_TOKEN = "STS_TOKEN"; + + private static final String MOCK_TEST_AK_WITH_BUCKET = "AK_WITH_BUCKET"; + private static final String MOCK_TEST_SK_WITH_BUCKET = "SK_WITH_BUCKET"; + private static final String MOCK_TEST_STS_TOKEN_WITH_BUCKET = "STS_TOKEN_WITH_BUCKET"; + + private static final String MOCK_TEST_ENV_AK = "ENV_AK"; + private static final String MOCK_TEST_ENV_SK = "ENV_SK"; + private static final String MOCK_TEST_ENV_STS_TOKEN = "ENV_STS_TOKEN"; + + private static final String MOCK_TEST_BUCKET = "test"; + private static final String MOCK_TEST_ROLE_NAME = "roleName"; + private static final String MOCK_PATH = "/volcstack/latest/iam/security_credentials/"; + private static final String API_ENDPOINT = MOCK_PATH + MOCK_TEST_ROLE_NAME; + private static final String EXPIRED_TIME_PATTERN = "yyyy-MM-dd'T'HH:mm:ssXXX"; + + @Override + public Configuration getConf() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key("test"), MOCK_TEST_AK_WITH_BUCKET); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key("test"), MOCK_TEST_SK_WITH_BUCKET); + conf.set(TosKeys.FS_TOS_BUCKET_SESSION_TOKEN.key("test"), MOCK_TEST_STS_TOKEN_WITH_BUCKET); + conf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, MOCK_TEST_AK); + conf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, MOCK_TEST_SK); + conf.set(TosKeys.FS_TOS_SESSION_TOKEN, MOCK_TEST_TST_TOKEN); + return conf; + } + + @BeforeEach + public void setUp() { + saveOsCredEnv(); + } + + @Test + public void testLoadCredFromEnvProvider() { + Configuration conf = getConf(); + setSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID, MOCK_TEST_ENV_AK); + setSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY, MOCK_TEST_ENV_SK); + setSystemEnv(TOS.ENV_TOS_SESSION_TOKEN, MOCK_TEST_ENV_STS_TOKEN); + DefaultCredentialsProviderChain chain = new DefaultCredentialsProviderChain(); + chain.initialize(conf, null); + + assertEquals(chain.credential().getAccessKeyId(), + MOCK_TEST_ENV_AK, String.format("expect %s", MOCK_TEST_ENV_AK)); + assertEquals(chain.credential().getAccessKeySecret(), MOCK_TEST_ENV_SK, + String.format("expect %s", MOCK_TEST_ENV_SK)); + assertEquals(chain.credential().getSecurityToken(), MOCK_TEST_ENV_STS_TOKEN, + String.format("expect %s", MOCK_TEST_ENV_STS_TOKEN)); + assertTrue(chain.lastUsedProvider() instanceof EnvironmentCredentialsProvider); + } + + @Test + public void testLoadCredFromSimpleProviderWithBucket() { + Configuration conf = getConf(); + removeSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID); + removeSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY); + removeSystemEnv(TOS.ENV_TOS_SESSION_TOKEN); + DefaultCredentialsProviderChain chain = new DefaultCredentialsProviderChain(); + chain.initialize(conf, MOCK_TEST_BUCKET); + + assertEquals(chain.credential().getAccessKeyId(), MOCK_TEST_AK_WITH_BUCKET, + String.format("expect %s", MOCK_TEST_AK_WITH_BUCKET)); + assertEquals(chain.credential().getAccessKeySecret(), MOCK_TEST_SK_WITH_BUCKET, + String.format("expect %s", MOCK_TEST_SK_WITH_BUCKET)); + assertEquals(chain.credential().getSecurityToken(), MOCK_TEST_STS_TOKEN_WITH_BUCKET, + String.format("expect %s", MOCK_TEST_STS_TOKEN_WITH_BUCKET)); + assertTrue(chain.lastUsedProvider() instanceof SimpleCredentialsProvider); + } + + @Test + public void testLoadCredFromSimpleProvider() { + Configuration conf = getConf(); + removeSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID); + removeSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY); + DefaultCredentialsProviderChain chain = new DefaultCredentialsProviderChain(); + chain.initialize(conf, "test-bucket"); + + assertEquals(chain.credential().getAccessKeyId(), MOCK_TEST_AK, + String.format("expect %s", MOCK_TEST_AK)); + assertEquals(chain.credential().getAccessKeySecret(), MOCK_TEST_SK, + String.format("expect %s", MOCK_TEST_SK)); + assertTrue(chain.lastUsedProvider() instanceof SimpleCredentialsProvider); + } + + @Test + public void testNotFoundAnyProvider() { + removeSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID); + removeSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY); + DefaultCredentialsProviderChain chain = new DefaultCredentialsProviderChain(); + chain.initialize(new Configuration(), MOCK_TEST_BUCKET); + assertThrows(RuntimeException.class, chain::credential); + } + + @AfterEach + public void after() { + resetOsCredEnv(); + } + + @Test + public void testShouldReturnAKSKFollowByProviderSequence() { + setSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID, "ENV_ACCESS_KEY"); + setSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY, "ENV_SECRET_KEY"); + + // use the simple credential provider at first. + String providerClassesStr = SimpleCredentialsProvider.class.getName() + ',' + + EnvironmentCredentialsProvider.class.getName(); + + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES, providerClassesStr); + conf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, MOCK_TEST_AK); + conf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, MOCK_TEST_SK); + conf.set(TosKeys.FS_TOS_SESSION_TOKEN, MOCK_TEST_TST_TOKEN); + + DefaultCredentialsProviderChain provider = new DefaultCredentialsProviderChain(); + provider.initialize(conf, MOCK_TEST_BUCKET); + + ExpireableCredential cred = provider.createCredential(); + assertEquals(MOCK_TEST_AK, cred.getAccessKeyId()); + assertEquals(MOCK_TEST_SK, cred.getAccessKeySecret()); + + assertFalse(cred.isExpired()); + + // use the env credential provider at first. + providerClassesStr = EnvironmentCredentialsProvider.class.getName() + ',' + + SimpleCredentialsProvider.class.getName(); + conf.set(TosKeys.FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES, providerClassesStr); + + provider = new DefaultCredentialsProviderChain(); + provider.initialize(conf, MOCK_TEST_BUCKET); + cred = provider.createCredential(); + assertEquals("ENV_ACCESS_KEY", cred.getAccessKeyId()); + assertEquals("ENV_SECRET_KEY", cred.getAccessKeySecret()); + assertFalse(cred.isExpired()); + + removeSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID); + removeSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY); + } + + @Test + public void testShouldThrowExceptionWhenCustomClassNotFound() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES, + SimpleCredentialsProvider.class.getName() + "NotExist"); + + DefaultCredentialsProviderChain provider = new DefaultCredentialsProviderChain(); + TosException tosException = + assertThrows(TosException.class, () -> provider.initialize(conf, null)); + assertTrue(tosException.getCause() instanceof ClassNotFoundException); + } + + @Test + public void testShouldThrowExceptionIfNoDefaultConstructorFound() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_CUSTOM_CREDENTIAL_PROVIDER_CLASSES, + TestCredentialProviderNoDefaultConstructor.class.getName()); + DefaultCredentialsProviderChain provider = new DefaultCredentialsProviderChain(); + RuntimeException exception = + assertThrows(RuntimeException.class, () -> provider.initialize(conf, null)); + assertTrue(exception.getMessage().contains("java.lang.NoSuchMethodException")); + } + + static class TestCredentialProviderNoDefaultConstructor extends AbstractCredentialsProvider { + + TestCredentialProviderNoDefaultConstructor(String fake) { + } + + @Override + protected ExpireableCredential createCredential() { + return null; + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestEnvironmentCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestEnvironmentCredentialsProvider.java new file mode 100644 index 0000000000000..dfc8870e3f1e0 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestEnvironmentCredentialsProvider.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos.auth; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.object.tos.TOS; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +public class TestEnvironmentCredentialsProvider extends TestAbstractCredentialsProvider { + + @BeforeEach + public void setUp() { + saveOsCredEnv(); + } + + @Test + public void testLoadAkSkFromEnvProvider() { + TestUtility.setSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID, "AccessKeyId"); + TestUtility.setSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY, "SecretAccessKey"); + + EnvironmentCredentialsProvider provider = new EnvironmentCredentialsProvider(); + provider.initialize(new Configuration(), null); + + ExpireableCredential oldCred = provider.credential(); + assertEquals(oldCred.getAccessKeyId(), "AccessKeyId", "provider ak must be equals to env ak"); + assertEquals(oldCred.getAccessKeySecret(), "SecretAccessKey", + "provider sk must be equals to env sk"); + + TestUtility.setSystemEnv(TOS.ENV_TOS_ACCESS_KEY_ID, "newAccessKeyId"); + TestUtility.setSystemEnv(TOS.ENV_TOS_SECRET_ACCESS_KEY, "newSecretAccessKey"); + TestUtility.setSystemEnv(TOS.ENV_TOS_SESSION_TOKEN, "newSessionToken"); + + assertFalse(oldCred.isExpired()); + + ExpireableCredential newCred = provider.credential(); + assertEquals(newCred.getAccessKeyId(), "AccessKeyId", "provider ak must be equals to env ak"); + assertEquals(newCred.getAccessKeySecret(), "SecretAccessKey", + "provider sk must be equals to env sk"); + + assertFalse(newCred.isExpired()); + } + + @AfterEach + public void resetEnv() { + resetOsCredEnv(); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestSimpleCredentialsProvider.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestSimpleCredentialsProvider.java new file mode 100644 index 0000000000000..35392c119b852 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/object/tos/auth/TestSimpleCredentialsProvider.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.object.tos.auth; + +import com.volcengine.tos.auth.Credential; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.tosfs.conf.TosKeys; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestSimpleCredentialsProvider extends TestAbstractCredentialsProvider { + + @Test + public void testStaticCredentials() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, "SECRET_KEY"); + conf.set(TosKeys.FS_TOS_SESSION_TOKEN, "STS_TOKEN"); + SimpleCredentialsProvider provider = new SimpleCredentialsProvider(); + provider.initialize(conf, "test"); + Credential credentials = provider.credential(); + assertEquals("ACCESS_KEY", credentials.getAccessKeyId(), "access key must be ACCESS_KEY"); + assertEquals("SECRET_KEY", credentials.getAccessKeySecret(), "secret key must be SECRET_KEY"); + assertEquals("STS_TOKEN", credentials.getSecurityToken(), "sts token must be STS_TOKEN"); + } + + @Test + public void testStaticCredentialsWithBucket() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key("test"), "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key("test"), "SECRET_KEY"); + conf.set(TosKeys.FS_TOS_BUCKET_SESSION_TOKEN.key("test"), "STS_TOKEN"); + SimpleCredentialsProvider provider = new SimpleCredentialsProvider(); + provider.initialize(conf, "test"); + Credential credentials = provider.credential(); + assertEquals("ACCESS_KEY", credentials.getAccessKeyId(), "access key must be ACCESS_KEY"); + assertEquals("SECRET_KEY", credentials.getAccessKeySecret(), "secret key must be SECRET_KEY"); + assertEquals("STS_TOKEN", credentials.getSecurityToken(), "sts token must be STS_TOKEN"); + } + + @Test + public void testStaticCredentialsWithPriority() { + Configuration conf = new Configuration(); + conf.set(TosKeys.FS_TOS_ACCESS_KEY_ID, "ACCESS_KEY"); + conf.set(TosKeys.FS_TOS_SECRET_ACCESS_KEY, "SECRET_KEY"); + conf.set(TosKeys.FS_TOS_SESSION_TOKEN, "STS_TOKEN"); + conf.set(TosKeys.FS_TOS_BUCKET_ACCESS_KEY_ID.key("test"), "ACCESS_KEY_BUCKET"); + conf.set(TosKeys.FS_TOS_BUCKET_SECRET_ACCESS_KEY.key("test"), "SECRET_KEY_BUCKET"); + conf.set(TosKeys.FS_TOS_BUCKET_SESSION_TOKEN.key("test"), "STS_TOKEN_BUCKET"); + + SimpleCredentialsProvider provider = new SimpleCredentialsProvider(); + provider.initialize(conf, "test"); + Credential credentials = provider.credential(); + assertEquals("ACCESS_KEY_BUCKET", credentials.getAccessKeyId(), + "access key must be ACCESS_KEY_BUCKET"); + assertEquals("SECRET_KEY_BUCKET", credentials.getAccessKeySecret(), + "secret key must be SECRET_KEY_BUCKET"); + assertEquals("STS_TOKEN_BUCKET", credentials.getSecurityToken(), + "sts token must be STS_TOKEN_BUCKET"); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseFsOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseFsOps.java new file mode 100644 index 0000000000000..3048f05eef30b --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseFsOps.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.ops; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; +import org.apache.hadoop.fs.tosfs.RawFileStatus; +import org.apache.hadoop.fs.tosfs.object.ObjectInfo; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.io.InputStream; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public abstract class TestBaseFsOps implements TestBaseOps { + + private ObjectStorage storage; + + @Override + public ObjectStorage storage() { + return storage; + } + + private void setStorage(ObjectStorage storage) { + this.storage = storage; + } + + @AfterEach + public void tearDown() { + CommonUtils.runQuietly(() -> storage.deleteAll("")); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDeleteFile(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path path = new Path("/a/b"); + touchFile(path, TestUtility.rand(8)); + assertFileExist(path); + + fsOps.deleteFile(path); + assertFileDoesNotExist(path); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDeleteEmptyDir(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path path = new Path("/a/b/"); + mkdir(path); + + fsOps.deleteDir(path, false); + assertDirDoesNotExist(path); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testDeleteNonEmptyDir(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path dirPath = new Path("/a/b/"); + Path subDirPath = new Path("/a/b/c/"); + Path filePath = new Path("/a/b/file.txt"); + mkdir(dirPath); + mkdir(subDirPath); + touchFile(filePath, new byte[10]); + + assertThrows(PathIsNotEmptyDirectoryException.class, () -> fsOps.deleteDir(dirPath, false)); + assertDirExist(dirPath); + assertDirExist(subDirPath); + assertFileExist(filePath); + + fsOps.deleteDir(dirPath, true); + assertDirDoesNotExist(dirPath); + assertDirDoesNotExist(subDirPath); + assertFileDoesNotExist(filePath); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testCreateDirRecursive(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path path = new Path("/aa/bb/cc"); + String key = ObjectUtils.pathToKey(path, true); + String parentKey = ObjectUtils.pathToKey(path.getParent(), true); + String grandparents = ObjectUtils.pathToKey(path.getParent().getParent(), true); + + assertDirDoesNotExist(parentKey); + assertDirDoesNotExist(grandparents); + + fsOps.mkdirs(path); + assertDirExist(key); + assertDirExist(parentKey); + assertDirExist(grandparents); + + store.delete(key); + assertDirExist(parentKey); + assertDirExist(grandparents); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListEmptyDir(ObjectStorage store, FsOps fsOps) { + setStorage(store); + Path dir = path("testListEmptyDir"); + mkdir(dir); + + assertFalse(listDir(fsOps, dir, false).iterator().hasNext()); + assertFalse(listDir(fsOps, dir, true).iterator().hasNext()); + assertTrue(fsOps.isEmptyDirectory(dir)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListNonExistDir(ObjectStorage store, FsOps fsOps) { + setStorage(store); + Path dir = path("testListNonExistDir"); + assertDirDoesNotExist(dir); + + assertFalse(listDir(fsOps, dir, false).iterator().hasNext()); + assertFalse(listDir(fsOps, dir, false).iterator().hasNext()); + assertTrue(fsOps.isEmptyDirectory(dir)); + } + + private Iterable listDir(FsOps fsOps, Path dir, boolean recursive) { + return fsOps.listDir(dir, recursive, s -> true); + } + + private Iterable listFiles(FsOps fsOps, Path dir, boolean recursive) { + return fsOps.listDir(dir, recursive, s -> !ObjectInfo.isDir(s)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListAFileViaListDir(ObjectStorage store, FsOps fsOps) { + setStorage(store); + Path file = new Path("testListFileViaListDir"); + touchFile(file, TestUtility.rand(8)); + assertFalse(listDir(fsOps, file, false).iterator().hasNext()); + assertFalse(listDir(fsOps, file, true).iterator().hasNext()); + + Path nonExistFile = new Path("testListFileViaListDir-nonExist"); + assertFileDoesNotExist(nonExistFile); + assertFalse(listDir(fsOps, nonExistFile, false).iterator().hasNext()); + assertFalse(listDir(fsOps, nonExistFile, true).iterator().hasNext()); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testListFiles(ObjectStorage store, FsOps fsOps) { + setStorage(store); + Path dir = path("testListEmptyFiles"); + mkdir(dir); + + assertFalse(listFiles(fsOps, dir, false).iterator().hasNext()); + assertFalse(listFiles(fsOps, dir, true).iterator().hasNext()); + + mkdir(new Path(dir, "subDir")); + assertFalse(listFiles(fsOps, dir, false).iterator().hasNext()); + assertFalse(listFiles(fsOps, dir, true).iterator().hasNext()); + + RawFileStatus subDir = listDir(fsOps, dir, false).iterator().next(); + assertFalse(subDir.isFile()); + assertEquals("subDir", subDir.getPath().getName()); + + ObjectInfo fileObj = touchFile(new Path(dir, "subFile"), TestUtility.rand(8)); + RawFileStatus subFile = listFiles(fsOps, dir, false).iterator().next(); + assertArrayEquals(fileObj.checksum(), subFile.checksum()); + assertTrue(subFile.isFile()); + + assertFalse(fsOps.isEmptyDirectory(dir)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRecursiveList(ObjectStorage store, FsOps fsOps) { + setStorage(store); + Path root = path("root"); + Path file1 = path("root", "file1"); + Path file2 = path("root", "afile2"); + Path dir1 = path("root", "dir1"); + Path file3 = path("root", "dir1", "file3"); + + mkdir(root); + mkdir(dir1); + touchFile(file1, TestUtility.rand(8)); + touchFile(file2, TestUtility.rand(8)); + touchFile(file3, TestUtility.rand(8)); + + // List result is in sorted lexicographical order if recursive is false + Assertions.assertThat(listDir(fsOps, root, false)) + .hasSize(3) + .extracting(f -> f.getPath().getName()) + .contains("afile2", "dir1", "file1"); + + // List result is in sorted lexicographical order if recursive is false + Assertions.assertThat(listFiles(fsOps, root, false)) + .hasSize(2) + .extracting(f -> f.getPath().getName()) + .contains("afile2", "file1"); + + // listDir with recursive=true doesn't guarantee the return result in a sorted order + Assertions.assertThat(listDir(fsOps, root, true)) + .hasSize(4) + .extracting(f -> f.getPath().getName()) + .containsExactlyInAnyOrder("afile2", "dir1", "file1", "file3"); + + // listFiles with recursive=true doesn't guarantee the return result in a sorted order + Assertions.assertThat(listFiles(fsOps, root, true)) + .hasSize(3) + .extracting(f -> f.getPath().getName()) + .containsExactlyInAnyOrder("afile2", "file1", "file3"); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRenameFile(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + + int dataSize = 1024 * 1024; + String filename = String.format("%sMB.txt", dataSize >> 20); + Path srcFile = new Path(renameSrc, filename); + byte[] data = writeData(srcFile, dataSize); + Path dstFile = new Path(renameDest, filename); + + // The dest file and dest parent don't exist. + assertFileExist(srcFile); + assertDirDoesNotExist(renameDest); + assertFileDoesNotExist(dstFile); + + fsOps.renameFile(srcFile, dstFile, data.length); + assertFileDoesNotExist(srcFile); + assertDirExist(renameSrc); + assertFileExist(dstFile); + + try (InputStream in = store.get(ObjectUtils.pathToKey(dstFile)).stream()) { + assertArrayEquals(data, IOUtils.toByteArray(in)); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRenameDir(ObjectStorage store, FsOps fsOps) throws IOException { + setStorage(store); + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + + mkdir(renameSrc); + int dataSize = 1024 * 1024; + String filename = String.format("%sMB.txt", dataSize >> 20); + Path srcFile = new Path(renameSrc, filename); + Path dstFile = new Path(renameDest, filename); + byte[] data = writeData(srcFile, dataSize); + + assertFileExist(srcFile); + assertFileDoesNotExist(dstFile); + assertDirExist(renameSrc); + assertDirDoesNotExist(renameDest); + + fsOps.renameDir(renameSrc, renameDest); + assertFileDoesNotExist(srcFile); + assertDirDoesNotExist(renameSrc); + assertFileExist(dstFile); + assertDirExist(renameDest); + + try (InputStream in = store.get(ObjectUtils.pathToKey(dstFile)).stream()) { + assertArrayEquals(data, IOUtils.toByteArray(in)); + } + } + + private byte[] writeData(Path path, int size) { + byte[] data = TestUtility.rand(size); + touchFile(path, data); + return data; + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseOps.java new file mode 100644 index 0000000000000..90e31cbc47bdd --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestBaseOps.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.ops; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.object.ObjectInfo; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +public interface TestBaseOps { + + default Path path(String... keys) { + return new Path(String.format("/%s", Joiner.on("/").join(keys))); + } + + default void assertFileExist(Path file) { + assertNotNull(ObjectUtils.pathToKey(file)); + } + + default void assertFileDoesNotExist(String key) { + assertNull(storage().head(key)); + } + + default void assertFileDoesNotExist(Path file) { + assertFileDoesNotExist(ObjectUtils.pathToKey(file)); + } + + default void assertDirExist(String key) { + assertNotNull(storage().head(key)); + } + + default void assertDirExist(Path path) { + assertDirExist(ObjectUtils.pathToKey(path, true)); + } + + default void assertDirDoesNotExist(String key) { + assertNull(storage().head(key)); + } + + default void assertDirDoesNotExist(Path path) { + assertDirDoesNotExist(ObjectUtils.pathToKey(path, true)); + } + + default void mkdir(Path path) { + storage().put(ObjectUtils.pathToKey(path, true), new byte[0]); + assertDirExist(path); + } + + default ObjectInfo touchFile(Path path, byte[] data) { + byte[] checksum = storage().put(ObjectUtils.pathToKey(path), data); + ObjectInfo obj = storage().head(ObjectUtils.pathToKey(path)); + assertArrayEquals(checksum, obj.checksum()); + return obj; + } + + ObjectStorage storage(); +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDefaultFsOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDefaultFsOps.java new file mode 100644 index 0000000000000..e587c71c61ab9 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDefaultFsOps.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.ops; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.RawFileStatus; +import org.apache.hadoop.fs.tosfs.RawFileSystem; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.provider.Arguments; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.stream.Stream; + +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestDefaultFsOps extends TestBaseFsOps { + private static ExecutorService threadPool; + + static Stream provideArguments() { + // Case1: direct rename. + List values = new ArrayList<>(); + Configuration directRenameConf = new Configuration(); + directRenameConf.setBoolean(ConfKeys.FS_OBJECT_RENAME_ENABLED.key("tos"), true); + directRenameConf.setBoolean(ConfKeys.FS_ASYNC_CREATE_MISSED_PARENT.key("tos"), false); + + ObjectStorage storage0 = + ObjectStorageFactory.createWithPrefix(String.format("tos-%s/", UUIDUtils.random()), + TOS_SCHEME, TestUtility.bucket(), directRenameConf); + values.add(Arguments.of( + storage0, + new DefaultFsOps(storage0, directRenameConf, threadPool, obj -> { + long modifiedTime = RawFileSystem.dateToLong(obj.mtime()); + String path = + String.format("%s://%s/%s", storage0.scheme(), storage0.bucket().name(), obj.key()); + return new RawFileStatus(obj.size(), obj.isDir(), 0, modifiedTime, new Path(path), "fake", + obj.checksum()); + }))); + + // Case2: copied rename. + Configuration copiedRenameConf = new Configuration(); + copiedRenameConf.setLong(ConfKeys.FS_MULTIPART_COPY_THRESHOLD.key("tos"), 1L << 20); + copiedRenameConf.setBoolean(ConfKeys.FS_ASYNC_CREATE_MISSED_PARENT.key("tos"), false); + + ObjectStorage storage1 = + ObjectStorageFactory.createWithPrefix(String.format("tos-%s/", UUIDUtils.random()), + TOS_SCHEME, TestUtility.bucket(), copiedRenameConf); + values.add(Arguments.of( + storage1, + new DefaultFsOps(storage1, copiedRenameConf, threadPool, obj -> { + long modifiedTime = RawFileSystem.dateToLong(obj.mtime()); + String path = + String.format("%s://%s/%s", storage1.scheme(), storage1.bucket().name(), obj.key()); + return new RawFileStatus(obj.size(), obj.isDir(), 0, modifiedTime, new Path(path), "fake", + obj.checksum()); + }))); + + return values.stream(); + } + + @BeforeAll + public static void beforeClass() { + assumeTrue(TestEnv.checkTestEnabled()); + threadPool = ThreadPools.newWorkerPool("TestDefaultFsHelper-pool"); + } + + @AfterAll + public static void afterClass() { + if (!TestEnv.checkTestEnabled()) { + return; + } + + if (!threadPool.isShutdown()) { + threadPool.shutdown(); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDirectoryFsOps.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDirectoryFsOps.java new file mode 100644 index 0000000000000..eaac49ef73e90 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestDirectoryFsOps.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.ops; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.RawFileStatus; +import org.apache.hadoop.fs.tosfs.RawFileSystem; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.object.DirectoryStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectStorageFactory; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.fs.tosfs.util.UUIDUtils; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.provider.Arguments; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import static org.apache.hadoop.fs.tosfs.object.tos.TOS.TOS_SCHEME; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +// TODO change to directory bucket configuration. +public class TestDirectoryFsOps extends TestBaseFsOps { + + public static Stream provideArguments() { + List values = new ArrayList<>(); + + ObjectStorage storage = + ObjectStorageFactory.createWithPrefix(String.format("tos-%s/", UUIDUtils.random()), + TOS_SCHEME, TestUtility.bucket(), new Configuration()); + values.add(Arguments.of(storage, new DirectoryFsOps((DirectoryStorage) storage, obj -> { + long modifiedTime = RawFileSystem.dateToLong(obj.mtime()); + String path = + String.format("%s://%s/%s", storage.scheme(), storage.bucket().name(), obj.key()); + return new RawFileStatus(obj.size(), obj.isDir(), 0, modifiedTime, new Path(path), "fake", + obj.checksum()); + }))); + + return values.stream(); + } + + @BeforeAll + public static void beforeClass() { + assumeTrue(TestEnv.checkTestEnabled()); + } + + @Override + public void testRenameDir(ObjectStorage store, FsOps fsOps) { + // Will remove this test case once test environment support + assumeTrue(store.bucket().isDirectory()); + } + + @Override + public void testRenameFile(ObjectStorage store, FsOps fsOps) { + // Will remove this test case once test environment support + assumeTrue(store.bucket().isDirectory()); + } + + @Override + public void testCreateDirRecursive(ObjectStorage store, FsOps fsOps) { + // Will remove this test case once test environment support + assumeTrue(store.bucket().isDirectory()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestRenameOp.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestRenameOp.java new file mode 100644 index 0000000000000..159e6f8760147 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/ops/TestRenameOp.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.ops; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.tosfs.TestEnv; +import org.apache.hadoop.fs.tosfs.common.ThreadPools; +import org.apache.hadoop.fs.tosfs.conf.ConfKeys; +import org.apache.hadoop.fs.tosfs.object.ObjectStorage; +import org.apache.hadoop.fs.tosfs.object.ObjectUtils; +import org.apache.hadoop.fs.tosfs.object.Part; +import org.apache.hadoop.fs.tosfs.util.CommonUtils; +import org.apache.hadoop.fs.tosfs.util.TempFiles; +import org.apache.hadoop.fs.tosfs.util.TestUtility; +import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +public class TestRenameOp implements TestBaseOps { + private static final String FILE_STORE_ROOT = TempFiles.newTempDir("TestRenameOp"); + + private ObjectStorage storage; + private ExecutorService renamePool; + + static Stream provideArguments() { + assumeTrue(TestEnv.checkTestEnabled()); + + List values = new ArrayList<>(); + List storages = TestUtility.createTestObjectStorage(FILE_STORE_ROOT); + for (ObjectStorage store : storages) { + values.add(Arguments.of(store)); + } + return values.stream(); + } + + @Override + public ObjectStorage storage() { + return storage; + } + + private void setStorage(ObjectStorage storage) { + this.storage = storage; + } + + @BeforeEach + public void prepare() { + this.renamePool = ThreadPools.newWorkerPool("renamePool"); + } + + @AfterEach + public void tearDown() { + CommonUtils.runQuietly(() -> storage().deleteAll("")); + CommonUtils.runQuietly(renamePool::shutdown); + } + + @AfterAll + public static void afterClass() { + CommonUtils.runQuietly(() -> TempFiles.deleteDir(FILE_STORE_ROOT)); + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRenameFileDirectly(ObjectStorage store) throws IOException { + setStorage(store); + + Configuration conf = new Configuration(); + conf.setLong(ConfKeys.FS_MULTIPART_COPY_THRESHOLD.key(store.scheme()), 1L << 20); + ExtendedRenameOp operation = new ExtendedRenameOp(conf, store, renamePool); + + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + + int dataSize = 1024 * 1024; + String filename = String.format("%sMB.txt", dataSize >> 20); + Path srcFile = new Path(renameSrc, filename); + Path dstFile = new Path(renameDest, filename); + byte[] data = writeData(srcFile, dataSize); + mkdir(renameDest); + + assertFileExist(srcFile); + assertFileDoesNotExist(dstFile); + assertDirExist(renameDest); + + operation.renameFile(srcFile, dstFile, data.length); + assertFileDoesNotExist(srcFile); + assertFileExist(dstFile); + Map> uploadInfos = operation.uploadInfos; + assertEquals(0, uploadInfos.size(), + "use put method when rename file, upload info's size should be 0"); + + try (InputStream in = store.get(ObjectUtils.pathToKey(dstFile)).stream()) { + assertArrayEquals(data, IOUtils.toByteArray(in)); + } + } + + @ParameterizedTest + @MethodSource("provideArguments") + public void testRenameFileByUploadParts(ObjectStorage store) throws IOException { + setStorage(store); + + assumeFalse(store.bucket().isDirectory()); + Configuration conf = new Configuration(); + conf.setLong(ConfKeys.FS_MULTIPART_COPY_THRESHOLD.key(store.scheme()), 1L << 20); + ExtendedRenameOp operation = new ExtendedRenameOp(conf, store, renamePool); + + Path renameSrc = path("renameSrc"); + Path renameDest = path("renameDst"); + + int dataSize = 10 * 1024 * 1024; + String filename = String.format("%sMB.txt", dataSize >> 20); + Path srcFile = new Path(renameSrc, filename); + Path dstFile = new Path(renameDest, filename); + byte[] data = writeData(srcFile, dataSize); + mkdir(renameDest); + + assertFileExist(srcFile); + assertFileDoesNotExist(dstFile); + assertDirExist(renameDest); + + operation.renameFile(srcFile, dstFile, data.length); + assertFileDoesNotExist(srcFile); + assertFileExist(dstFile); + Map> uploadInfos = operation.uploadInfos; + assertTrue(uploadInfos.size() != 0, + "use upload parts method when rename file, upload info's size should not be 0"); + List parts = uploadInfos.get(ObjectUtils.pathToKey(dstFile)); + assertNotNull(parts, + "use upload parts method when rename file, upload info should not be null"); + assertTrue(parts.size() >= 2, "use upload parts method when rename file," + + " the num of upload parts should be greater than or equal to 2"); + long fileLength = parts.stream().mapToLong(Part::size).sum(); + assertEquals(dataSize, fileLength); + + try (InputStream in = store.get(ObjectUtils.pathToKey(dstFile)).stream()) { + assertArrayEquals(data, IOUtils.toByteArray(in)); + } + } + + private byte[] writeData(Path path, int size) { + byte[] data = TestUtility.rand(size); + touchFile(path, data); + return data; + } + + static class ExtendedRenameOp extends RenameOp { + private Map> uploadInfos = Maps.newHashMap(); + + ExtendedRenameOp(Configuration conf, ObjectStorage storage, ExecutorService pool) { + super(conf, storage, pool); + } + + @Override + protected void finishUpload(String key, String uploadId, List uploadParts) { + super.finishUpload(key, uploadId, uploadParts); + if (!uploadInfos.isEmpty()) { + uploadInfos.clear(); + } + uploadInfos.put(key, uploadParts); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TempFiles.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TempFiles.java new file mode 100644 index 0000000000000..d4191de66a7cc --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TempFiles.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.util; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.util.Lists; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; + +public final class TempFiles implements Closeable { + private final List files = Lists.newArrayList(); + private final List dirs = Lists.newArrayList(); + + private TempFiles() { + } + + public static TempFiles of() { + return new TempFiles(); + } + + public String newFile() { + String p = newTempFile(); + files.add(p); + return p; + } + + public String newDir() { + return newDir(null); + } + + public String newDir(String prefix) { + String p = newTempDir(prefix); + dirs.add(p); + return p; + } + + @Override + public void close() { + files.forEach(file -> CommonUtils.runQuietly(() -> TempFiles.deleteFile(file))); + files.clear(); + dirs.forEach(dir -> CommonUtils.runQuietly(() -> TempFiles.deleteDir(dir))); + dirs.clear(); + } + + public static String newTempFile() { + return String.join(File.pathSeparator, newTempDir(), UUIDUtils.random()); + } + + public static String newTempDir() { + return newTempDir(null); + } + + public static String newTempDir(String prefix) { + try { + return Files.createTempDirectory(prefix).toFile().getAbsolutePath(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public static void deleteFile(String path) { + try { + Files.deleteIfExists(Paths.get(path)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public static void deleteDir(String path) { + try { + FileUtils.deleteDirectory(new File(path)); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestFSUtils.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestFSUtils.java new file mode 100644 index 0000000000000..cdf32997777d2 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestFSUtils.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.util; + +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class TestFSUtils { + @Test + public void testNormalizeURI() throws URISyntaxException { + URI uri = new URI("tos://abc/dir/key"); + URI normalizeURI = FSUtils.normalizeURI(uri, new Configuration()); + assertEquals("tos", normalizeURI.getScheme()); + assertEquals("abc", normalizeURI.getAuthority()); + assertEquals("abc", normalizeURI.getHost()); + assertEquals("/dir/key", normalizeURI.getPath()); + + uri = new URI("/abc/dir/key"); + normalizeURI = FSUtils.normalizeURI(uri, new Configuration()); + assertNull(uri.getScheme()); + assertEquals("file", normalizeURI.getScheme()); + assertNull(uri.getAuthority()); + assertNull(normalizeURI.getAuthority()); + assertEquals("/abc/dir/key", uri.getPath()); + assertEquals("/", normalizeURI.getPath()); + + uri = new URI("tos:///abc/dir/key"); + normalizeURI = FSUtils.normalizeURI(uri, new Configuration()); + assertEquals("tos", uri.getScheme()); + assertNull(uri.getAuthority()); + assertEquals("/abc/dir/key", uri.getPath()); + assertEquals("tos", normalizeURI.getScheme()); + assertNull(normalizeURI.getAuthority()); + assertEquals("/abc/dir/key", normalizeURI.getPath()); + + Configuration conf = new Configuration(); + conf.set(FS_DEFAULT_NAME_KEY, "tos://bucket/"); + normalizeURI = FSUtils.normalizeURI(uri, conf); + assertEquals("tos", normalizeURI.getScheme()); + assertEquals("bucket", normalizeURI.getAuthority()); + assertEquals("/", normalizeURI.getPath()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestIterables.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestIterables.java new file mode 100644 index 0000000000000..16ba414af075c --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestIterables.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.util; + +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.function.Function; +import java.util.function.Predicate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestIterables { + + @Test + public void testTransform() { + List list = Arrays.asList(1, 2, 3, 4, 5); + Function transform = i -> i + 10; + Iterator iter = Iterables.transform(list, transform).iterator(); + + for (int i = 0; i < 5; i++) { + assertTrue(iter.hasNext()); + int value = iter.next(); + assertEquals(10 + i + 1, value); + } + assertFalse(iter.hasNext()); + } + + @Test + public void testTransformEmptyIterable() { + List list = Arrays.asList(); + Function transform = i -> i + 10; + Iterator iter = Iterables.transform(list, transform).iterator(); + + assertFalse(iter.hasNext()); + } + + @Test + public void testFilter() { + // Filter odd elements. + List list = Arrays.asList(1, 2, 3, 4, 5); + Predicate filter = i -> (i % 2) == 0; + Iterator iter = Iterables.filter(list, filter).iterator(); + + for (int i = 0; i < 2; i++) { + assertTrue(iter.hasNext()); + int value = iter.next(); + assertEquals((i + 1) * 2, value); + } + assertFalse(iter.hasNext()); + + // Ignore all elements. + filter = i -> false; + iter = Iterables.filter(list, filter).iterator(); + assertFalse(iter.hasNext()); + } + + @Test + public void testFilterEmptyIterable() { + List list = Arrays.asList(); + Predicate filter = i -> (i % 2) == 0; + Iterator iter = Iterables.filter(list, filter).iterator(); + + assertFalse(iter.hasNext()); + } + + // Full iterators. + @Test + public void testConcatFullIterators() { + List expectedList = new ArrayList<>(); + List> iterList = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List list = new ArrayList<>(); + for (int j = 0; j < 10; j++) { + list.add(i * 10 + j); + expectedList.add(i * 10 + j); + } + iterList.add(list); + } + + verifyConcat(expectedList.iterator(), iterList); + } + + // Empty iterators. + @Test + public void testConcatEmptyIterators() { + List expectedList = new ArrayList<>(); + List> iterList = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + iterList.add(Collections.emptyList()); + } + + verifyConcat(expectedList.iterator(), iterList); + } + + // Mix full and empty iterators. + @Test + public void testConcatMixFullAndEmptyIterators() { + List expectedList = new ArrayList<>(); + List> iterList = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List list = new ArrayList<>(); + for (int j = 0; j < 10; j++) { + list.add(i * 10 + j); + expectedList.add(i * 10 + j); + } + iterList.add(list); + iterList.add(Collections.emptyList()); + iterList.add(Collections.emptyList()); + } + + verifyConcat(expectedList.iterator(), iterList); + } + + // Invalid iterators. + @Test + public void testConcatNullMetaIterator() { + assertThrows(NullPointerException.class, () -> verifyConcat(Collections.emptyIterator(), null), + "Expect null verification error."); + } + + // Concat null iterators. + @Test + public void testConcatNullElementIterators() { + List> list = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + list.add(() -> null); + } + verifyConcat(Collections.emptyIterator(), list); + } + + private void verifyConcat(Iterator expectedValues, Iterable> metaIter) { + Iterator iter = Iterables.concat(metaIter).iterator(); + while (expectedValues.hasNext()) { + assertTrue(iter.hasNext()); + T v1 = expectedValues.next(); + T v2 = iter.next(); + assertEquals(v1, v2); + } + assertFalse(iter.hasNext()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestLazyReload.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestLazyReload.java new file mode 100644 index 0000000000000..c4c0d9ffb4ddf --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestLazyReload.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.util; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Iterator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestLazyReload { + @Test + public void testLoadWithFilterCondition() { + LazyReload integers = new LazyReload<>(() -> { + Iterator source = Arrays.asList(1, 3, 5, 2, 4, 6).iterator(); + return buf -> { + if (!source.hasNext()) { + return true; + } + + int pollCnt = 2; + while (source.hasNext() && pollCnt-- > 0) { + Integer item = source.next(); + if (item % 2 == 0) { + buf.add(item); + } + } + + return !source.hasNext(); + }; + }); + + Iterator iterator = integers.iterator(); + assertTrue(iterator.hasNext()); + assertEquals(2, (int) iterator.next()); + assertEquals(4, (int) iterator.next()); + assertEquals(6, (int) iterator.next()); + assertFalse(iterator.hasNext()); + } + + @Test + public void testLoadResultIsIdempotent() { + LazyReload integers = new LazyReload<>(() -> { + Iterator source = Arrays.asList(1, 3, 5, 2, 4, 6).iterator(); + return buf -> { + if (!source.hasNext()) { + return true; + } + + int pollCnt = 2; + while (source.hasNext() && pollCnt-- > 0) { + Integer item = source.next(); + buf.add(item); + } + + return !source.hasNext(); + }; + }); + Iterator iterator1 = integers.iterator(); + Iterator iterator2 = integers.iterator(); + + assertEquals(1, (int) iterator1.next()); + assertEquals(1, (int) iterator2.next()); + assertEquals(3, (int) iterator1.next()); + assertEquals(3, (int) iterator2.next()); + assertEquals(5, (int) iterator1.next()); + assertEquals(5, (int) iterator2.next()); + + assertEquals(2, (int) iterator1.next()); + assertEquals(4, (int) iterator1.next()); + assertEquals(6, (int) iterator1.next()); + assertEquals(2, (int) iterator2.next()); + assertEquals(4, (int) iterator2.next()); + assertEquals(6, (int) iterator2.next()); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestRange.java b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestRange.java new file mode 100644 index 0000000000000..08fcae1e20c7a --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/java/org/apache/hadoop/fs/tosfs/util/TestRange.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.tosfs.util; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestRange { + + @Test + public void testInclude() { + Object[][] inputs = new Object[][]{ + new Object[]{Range.of(0, 0), 0L, false}, + new Object[]{Range.of(0, 1), 0L, true}, + new Object[]{Range.of(1, 1), 0L, false}, + new Object[]{Range.of(1, 1), 1L, true}, + new Object[]{Range.of(1, 1), 2L, false}, + new Object[]{Range.of(1, 99), 0L, false}, + new Object[]{Range.of(1, 99), 1L, true}, + new Object[]{Range.of(1, 99), 99L, true}, + new Object[]{Range.of(1, 99), 100L, false} + }; + + for (Object[] input : inputs) { + Range r = (Range) input[0]; + long pos = (long) input[1]; + boolean expected = (boolean) input[2]; + + assertEquals(expected, r.include(pos)); + } + } + + @Test + public void testOverlap() { + Object[][] inputs = new Object[][]{ + new Object[]{Range.of(0, 0), Range.of(0, 0), false}, + new Object[]{Range.of(0, 1), Range.of(0, 1), true}, + new Object[]{Range.of(0, 1), Range.of(1, 0), false}, + new Object[]{Range.of(0, 1), Range.of(1, 1), false}, + new Object[]{Range.of(0, 2), Range.of(1, 1), true}, + new Object[]{Range.of(0, 2), Range.of(0, 1), true}, + new Object[]{Range.of(0, 2), Range.of(1, 2), true}, + new Object[]{Range.of(0, 2), Range.of(2, 0), false}, + new Object[]{Range.of(0, 2), Range.of(2, 1), false}, + new Object[]{Range.of(5, 9), Range.of(0, 5), false}, + new Object[]{Range.of(5, 9), Range.of(0, 6), true} + }; + + for (Object[] input : inputs) { + Range l = (Range) input[0]; + Range r = (Range) input[1]; + boolean expect = (boolean) input[2]; + + assertEquals(expect, l.overlap(r)); + } + } + + @Test + public void testSplit() { + assertEquals(Range.split(10, 3), + ImmutableList.of(Range.of(0, 3), Range.of(3, 3), Range.of(6, 3), Range.of(9, 1))); + assertEquals(Range.split(10, 5), + ImmutableList.of(Range.of(0, 5), Range.of(5, 5))); + assertEquals(Range.split(10, 12), + ImmutableList.of(Range.of(0, 10))); + assertEquals(Range.split(2, 1), + ImmutableList.of(Range.of(0, 1), Range.of(1, 1))); + } +} diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/contract/tos.xml b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/contract/tos.xml new file mode 100644 index 0000000000000..f1074e83d3c94 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/contract/tos.xml @@ -0,0 +1,136 @@ + + + + + + + + + fs.contract.test.fs.tos + tos://{your_bucket}/ + + + + fs.contract.test.random-seek-count + 10 + + + + fs.contract.is-blobstore + true + + + + fs.contract.create-visibility-delayed + true + + + + fs.contract.is-case-sensitive + true + + + + fs.contract.rename-returns-false-if-source-missing + true + + + + fs.contract.rename-returns-false-if-dest-exists + true + + + + fs.contract.supports-append + false + + + + fs.contract.supports-atomic-directory-delete + false + + + + fs.contract.supports-atomic-rename + false + + + + fs.contract.supports-block-locality + false + + + + fs.contract.supports-concat + false + + + + fs.contract.supports-unbuffer + true + + + + fs.contract.rename-creates-dest-dirs + true + + + + + fs.contract.test.root-tests-enabled + false + + + + fs.contract.supports-getfilestatus + true + + + + fs.contract.supports-seek + true + + + + fs.contract.supports-seek-on-closed-file + true + + + + fs.contract.rejects-seek-past-eof + true + + + + fs.contract.supports-strict-exceptions + true + + + + fs.contract.supports-multipartuploader + true + + + + fs.contract.supports-unix-permissions + false + + + diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/core-site.xml b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/core-site.xml new file mode 100644 index 0000000000000..c4829ba490767 --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/core-site.xml @@ -0,0 +1,59 @@ + + + + + + + hadoop.tmp.dir + target/build/test + A base for other temporary directories. + true + + + + + hadoop.security.authentication + simple + + + + fs.tos.impl + org.apache.hadoop.fs.tosfs.TosFileSystem + + + fs.AbstractFileSystem.tos.impl + org.apache.hadoop.fs.tosfs.TosFS + + + fs.tos.impl.disable.cache + true + + + fs.filestore.impl + org.apache.hadoop.fs.tosfs.RawFileSystem + + + fs.AbstractFileSystem.filestore.impl + org.apache.hadoop.fs.tosfs.RawFS + + + fs.filestore.impl.disable.cache + true + + diff --git a/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/log4j.properties b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/log4j.properties new file mode 100644 index 0000000000000..c671ccce5120c --- /dev/null +++ b/hadoop-cloud-storage-project/hadoop-tos/src/test/resources/log4j.properties @@ -0,0 +1,23 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# log4j configuration used during build and unit tests + +log4j.rootLogger=info,stdout +log4j.threshold=ALL +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n