Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;

import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED_DEFAULT;
import static org.apache.hadoop.fs.s3a.audit.impl.S3AInternalAuditConstants.AUDIT_SPAN_HANDLER_CONTEXT;

/**
Expand All @@ -58,8 +60,14 @@ private AuditIntegration() {
public static AuditManagerS3A createAndStartAuditManager(
Configuration conf,
IOStatisticsStore iostatistics) {
ActiveAuditManagerS3A auditManager = new ActiveAuditManagerS3A(
requireNonNull(iostatistics));
AuditManagerS3A auditManager;
if (conf.getBoolean(AUDIT_ENABLED, AUDIT_ENABLED_DEFAULT)) {
auditManager = new ActiveAuditManagerS3A(
requireNonNull(iostatistics));
} else {
LOG.debug("auditing is disabled");
auditManager = stubAuditManager();
}
auditManager.init(conf);
auditManager.start();
LOG.debug("Started Audit Manager {}", auditManager);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ private S3AAuditConstants() {
*/
public static final String UNAUDITED_OPERATION = "unaudited operation";

/**
* Is auditing enabled?
* Value: {@value}.
*/
public static final String AUDIT_ENABLED = "fs.s3a.audit.enabled";

/**
* Default auditing flag.
* Value: {@value}.
*/
public static final boolean AUDIT_ENABLED_DEFAULT = false;


/**
* Name of class used for audit logs: {@value}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@
/**
* Simple No-op audit manager for use before a real
* audit chain is set up, and for testing.
* Audit spans always have a unique ID and the activation/deactivation
* operations on them will update this audit manager's active span.
* It does have the service lifecycle, so do
* create a unique instance whenever used.
*/
Expand All @@ -59,14 +57,7 @@ public class NoopAuditManagerS3A extends CompositeService
/**
* The inner auditor.
*/
private NoopAuditor auditor = NOOP_AUDITOR;

/**
* Thread local span. This defaults to being
* the unbonded span.
*/
private final ThreadLocal<AuditSpanS3A> activeSpan =
ThreadLocal.withInitial(this::getUnbondedSpan);
private final NoopAuditor auditor = NOOP_AUDITOR;

/**
* ID which is returned as a span ID in the audit event
Expand Down Expand Up @@ -160,7 +151,7 @@ public boolean checkAccess(final Path path,

@Override
public void activate(final AuditSpanS3A span) {
activeSpan.set(span);
/* no-op */
}

@Override
Expand All @@ -180,6 +171,6 @@ public static AuditSpanS3A createNewSpan(
final String name,
final String path1,
final String path2) {
return NOOP_AUDITOR.createSpan(name, path1, path2);
return NoopSpan.INSTANCE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ and inside the AWS S3 SDK, immediately before the request is executed.
The full architecture is covered in [Auditing Architecture](auditing_architecture.html);
this document covers its use.

## Important: Auditing is disabled by default

Due to a memory leak from the use of `ThreadLocal` fields, this auditing feature leaks memory as S3A filesystem
instances are created and deleted.
This causes problems in long-lived processes which either do not re-use filesystem
instances, or attempt to delete all instances belonging to specific users.
See [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) _S3A auditing leaks memory through ThreadLocal references_.

To avoid these memory leaks, auditing is disabled by default.

To turn auditing on, set `fs.s3a.audit.enabled` to `true`.

## Auditing workflow

1. An _Auditor Service_ can be instantiated for each S3A FileSystem instance,
Expand Down Expand Up @@ -63,27 +75,43 @@ ideally even identifying the process/job generating load.

## Using Auditing

The Logging Auditor is enabled by default; it annotates the S3 logs.
Auditing is disabled by default.
When auditing enabled, a Logging Auditor will annotate the S3 logs through a custom
HTTP Referrer header in requests made to S3.
Other auditor classes may be used instead.

### Auditor Options

| Option | Meaning | Default Value |
|--------|---------|---------------|
| `fs.s3a.audit.enabled` | Is auditing enabled | `false` |
| `fs.s3a.audit.service.classname` | Auditor classname | `org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor` |
| `fs.s3a.audit.request.handlers` | List of extra subclasses of AWS SDK RequestHandler2 to include in handler chain | `""` |
| `fs.s3a.audit.referrer.enabled` | Logging auditor to publish the audit information in the HTTP Referrer header | `true` |
| `fs.s3a.audit.referrer.filter` | List of audit fields to filter | `""` |
| `fs.s3a.audit.reject.out.of.span.operations` | Auditor to reject operations "outside of a span" | `false` |


### Disabling Auditing with the No-op Auditor
### Disabling Auditing.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a mention that servicename=NoopAuditor also disables auditing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't though. it stil leaks memory as the service manager is still instantiated.

which is why i've cut it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah yes, you're right. Just realized, ActiveAuditManager with NoopAuditor would still leak, but NoopAuditManager is the one without the ThreadLocal variable now. Thanks for clearing it 👍

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding to the architecture as an FYI point


In this release of Hadoop, auditing is disabled.

The No-op auditor does not perform any logging of audit events.
This can be explicitly set globally or for specific buckets

```xml
<property>
<name>fs.s3a.audit.service.classname</name>
<value>org.apache.hadoop.fs.s3a.audit.impl.NoopAuditor</value>
<name>fs.s3a.audit.enabled</name>
<value>false</value>
</property>
```

Specific buckets can have auditing disabled, even when it is enabled globally.

```xml
<property>
<name>fs.s3a.bucket.landsat-pds.audit.enabled</name>
<value>false</value>
<description>Do not audit landsat bucket operations</description>
</property>
```

Expand All @@ -92,13 +120,18 @@ The No-op auditor does not perform any logging of audit events.
The "Logging Auditor" is the default auditor.
It provides two forms of logging

1. Logging of operations in the client via Log4J.
1. Logging of operations in the client via the active SLF4J imolementation.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: typo: "implementation"

1. Dynamic generation of the HTTP Referrer header for S3 requests.

The Logging Auditor is enabled by providing its classname in the option
`fs.s3a.audit.service.classname`.

```xml
<property>
<name>fs.s3a.audit.enabled</name>
<value>true</value>
</property>

<property>
<name>fs.s3a.audit.service.classname</name>
<value>org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,18 @@ the auditor is bound to.

The auditor then creates and returns a span for the specific operation.
The AuditManagerS3A will automatically activate the span returned by the auditor
(i.e. assign it the thread local variable tracking the active span in each thread)
(i.e. assign it the thread local variable tracking the active span in each thread).

### Memory Leakage through `ThreadLocal` use

This architecture contains a critical defect,
[HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) _S3A auditing leaks memory through ThreadLocal references_.

The code was written assuming that when the `ActiveAuditManagerS3A` service is
stopped, it's `ThreadLocal` fields would be freed.
In fact, they are retained until the threads with references are terminated.

This is why auditing is now disabled by default until a fix is implemented.

### Class `org.apache.hadoop.fs.audit.CommonAuditContext`

Expand All @@ -141,8 +152,19 @@ thread.

### class `NoopAuditor`

This auditor creates spans which perform no auditing.
It is very efficient and reliable.
This auditor creates spans which doesn't do anything with the events.

```xml
<property>
<name>fs.s3a.audit.service.classname</name>
<value>org.apache.hadoop.fs.s3a.audit.impl.NoopAuditor</value>
</property>
```

This is *not* the same as disabling auditing, as it still uses the `ActiveAuditManagerS3A` class
which is the source of memory leaks.

Avoid using it except in tests as there is no benefit -simply significant cost.

### class `LoggingAuditor`

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_FAILURE;
import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION;
import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_SPAN_CREATION;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.LOGGING_AUDIT_SERVICE;
Expand Down Expand Up @@ -68,6 +69,7 @@ public static Configuration noopAuditConfig() {
final Configuration conf = new Configuration(false);
conf.set(
AUDIT_SERVICE_CLASSNAME, NOOP_AUDIT_SERVICE);
conf.setBoolean(AUDIT_ENABLED, true);
return conf;
}

Expand All @@ -88,6 +90,7 @@ public static Configuration loggingAuditConfig() {
*/
public static Configuration enableLoggingAuditor(final Configuration conf) {
conf.set(AUDIT_SERVICE_CLASSNAME, LOGGING_AUDIT_SERVICE);
conf.setBoolean(AUDIT_ENABLED, true);
conf.setBoolean(REJECT_OUT_OF_SPAN_OPERATIONS, true);
return conf;
}
Expand Down Expand Up @@ -117,7 +120,8 @@ public static Configuration resetAuditOptions(Configuration conf) {
REFERRER_HEADER_ENABLED,
REJECT_OUT_OF_SPAN_OPERATIONS,
AUDIT_REQUEST_HANDLERS,
AUDIT_SERVICE_CLASSNAME);
AUDIT_SERVICE_CLASSNAME,
AUDIT_ENABLED);
return conf;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
import static org.apache.hadoop.fs.s3a.Statistic.AUDIT_REQUEST_EXECUTION;
import static org.apache.hadoop.fs.s3a.Statistic.INVOCATION_ACCESS;
import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_REQUEST;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME;
import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.resetAuditOptions;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_ENABLED;
import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_SERVICE_CLASSNAME;
import static org.apache.hadoop.fs.s3a.performance.OperationCost.FILE_STATUS_ALL_PROBES;
import static org.apache.hadoop.fs.s3a.performance.OperationCost.FILE_STATUS_FILE_PROBE;
import static org.apache.hadoop.fs.s3a.performance.OperationCost.ROOT_FILE_STATUS_PROBE;
Expand Down Expand Up @@ -67,6 +68,7 @@ public Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
resetAuditOptions(conf);
conf.set(AUDIT_SERVICE_CLASSNAME, AccessCheckingAuditor.CLASS);
conf.setBoolean(AUDIT_ENABLED, true);
return conf;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.s3a.audit;

import org.assertj.core.api.Assertions;
import org.junit.Test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.audit.impl.NoopAuditManagerS3A;
import org.apache.hadoop.fs.s3a.performance.AbstractS3ACostTest;

import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.NOOP_SPAN;
import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.resetAuditOptions;

/**
* Verify that by default audit managers are disabled.
*/
public class ITestAuditManagerDisabled extends AbstractS3ACostTest {

public ITestAuditManagerDisabled() {
super(true);
}

@Override
public Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
resetAuditOptions(conf);
return conf;
}

/**
* The default auditor is the no-op auditor.
*/
@Test
public void testAuditorDisabled() {

final S3AFileSystem fs = getFileSystem();
final AuditManagerS3A auditManager = fs.getAuditManager();

Assertions.assertThat(auditManager)
.isInstanceOf(NoopAuditManagerS3A.class);
}

/**
* All the audit spans are the no-op span.
*/
@Test
public void testAuditSpansAreAllTheSame() throws Throwable {

final S3AFileSystem fs = getFileSystem();
final AuditSpanS3A span1 = fs.createSpan("span1", null, null);
final AuditSpanS3A span2 = fs.createSpan("span2", null, null);
Assertions.assertThat(span1)
.describedAs("audit span 1")
.isSameAs(NOOP_SPAN);
Assertions.assertThat(span2)
.describedAs("audit span 2")
.isSameAs(span1);
}
}
Loading