From 1ad898bde29820a8ff41267457a4370395406ae5 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 21 Mar 2018 15:36:54 -0700 Subject: [PATCH 01/67] Use the official aws-sdk instead of jet3t (#5382) * Use the official aws-sdk instead of jet3t * fix compile and serde tests * address comments and fix test * add http version string * remove redundant dependencies, fix potential NPE, and fix test * resolve TODOs * fix build * downgrade jackson version to 2.6.7 * fix test * resolve the last TODO * support proxy and endpoint configurations * fix build * remove debugging log * downgrade hadoop version to 2.8.3 * fix tests * remove unused log * fix it test * revert KerberosAuthenticator change * change hadoop-aws scope to provided in hdfs-storage * address comments * address comments --- .travis.yml | 1 + .../io/druid/guice/JsonConfigProvider.java | 4 +- .../input/impl/InputRowParserSerdeTest.java | 8 +- .../data/input/impl/JSONParseSpecTest.java | 4 +- .../input/impl/JavaScriptParseSpecTest.java | 4 +- .../data/input/impl/RegexParseSpecTest.java | 4 +- aws-common/pom.xml | 2 +- .../druid/common/aws/AWSEndpointConfig.java | 52 +++++ .../io/druid/common/aws/AWSProxyConfig.java | 61 +++++ common/pom.xml | 16 +- .../content/development/extensions-core/s3.md | 18 +- .../kerberos/KerberosAuthenticator.java | 25 +- extensions-core/hdfs-storage/pom.xml | 11 +- .../storage/hdfs/HdfsDataSegmentFinder.java | 2 +- extensions-core/s3-extensions/pom.xml | 137 ++++++----- .../firehose/s3/StaticS3FirehoseFactory.java | 127 +++++------ .../s3/AWSSessionCredentialsAdapter.java | 70 ------ .../storage/s3/S3DataSegmentArchiver.java | 4 +- .../druid/storage/s3/S3DataSegmentFinder.java | 48 ++-- .../druid/storage/s3/S3DataSegmentKiller.java | 14 +- .../druid/storage/s3/S3DataSegmentMover.java | 70 +++--- .../druid/storage/s3/S3DataSegmentPuller.java | 73 +++--- .../druid/storage/s3/S3DataSegmentPusher.java | 86 +++---- .../storage/s3/S3StorageDruidModule.java | 58 ++++- .../java/io/druid/storage/s3/S3TaskLogs.java | 50 ++-- .../s3/S3TimestampVersionedDataFinder.java | 31 ++- .../java/io/druid/storage/s3/S3Utils.java | 215 +++++++++++------- .../s3/StaticS3FirehoseFactoryTest.java | 42 +++- .../storage/s3/S3DataSegmentArchiverTest.java | 4 +- .../storage/s3/S3DataSegmentFinderTest.java | 181 ++++++++------- .../storage/s3/S3DataSegmentMoverTest.java | 133 ++++++++--- .../storage/s3/S3DataSegmentPullerTest.java | 101 ++++---- .../storage/s3/S3DataSegmentPusherTest.java | 40 +++- .../S3TimestampVersionedDataFinderTest.java | 89 ++++---- .../s3/TestAWSCredentialsProvider.java | 6 +- indexing-hadoop/pom.xml | 32 ++- .../indexer/DetermineHashedPartitionsJob.java | 2 +- .../indexing/common/config/TaskConfig.java | 2 +- .../indexing/common/task/HadoopTask.java | 2 - .../autoscaling/EC2AutoScalerSerdeTest.java | 6 +- .../JavaScriptWorkerSelectStrategyTest.java | 4 +- pom.xml | 93 +------- .../groupby/orderby/DefaultLimitSpecTest.java | 30 +-- .../topn/AlphaNumericTopNMetricSpecTest.java | 4 +- .../topn/DimensionTopNMetricSpecTest.java | 16 +- server/pom.xml | 4 + .../main/java/io/druid/guice/AWSModule.java | 4 + .../firehose/HttpFirehoseFactory.java | 2 +- .../server/AsyncQueryForwardingServlet.java | 5 +- .../jetty/JettyServerModule.java | 4 +- .../dimension/LookupDimensionSpecTest.java | 4 +- 51 files changed, 1113 insertions(+), 892 deletions(-) create mode 100644 aws-common/src/main/java/io/druid/common/aws/AWSEndpointConfig.java create mode 100644 aws-common/src/main/java/io/druid/common/aws/AWSProxyConfig.java delete mode 100644 extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/AWSSessionCredentialsAdapter.java diff --git a/.travis.yml b/.travis.yml index 4fb64a7bf9af..4ec30507b4f4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -47,6 +47,7 @@ matrix: - sudo: false env: - NAME="other modules test" + - AWS_REGION=us-east-1 # set a aws region for unit tests install: echo "MAVEN_OPTS='-Xmx3000m'" > ~/.mavenrc && mvn install -q -ff -DskipTests -B before_script: - unset _JAVA_OPTIONS diff --git a/api/src/main/java/io/druid/guice/JsonConfigProvider.java b/api/src/main/java/io/druid/guice/JsonConfigProvider.java index c3a9cfd64d80..609567b2dd7d 100644 --- a/api/src/main/java/io/druid/guice/JsonConfigProvider.java +++ b/api/src/main/java/io/druid/guice/JsonConfigProvider.java @@ -130,8 +130,8 @@ public static void bind( Key> supplierKey ) { - binder.bind(supplierKey).toProvider((Provider) of(propertyBase, clazz)).in(LazySingleton.class); - binder.bind(instanceKey).toProvider(new SupplierProvider(supplierKey)); + binder.bind(supplierKey).toProvider(of(propertyBase, clazz)).in(LazySingleton.class); + binder.bind(instanceKey).toProvider(new SupplierProvider<>(supplierKey)); } @SuppressWarnings("unchecked") diff --git a/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java b/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java index e81696283447..c7cb2df6340c 100644 --- a/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java +++ b/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java @@ -101,9 +101,9 @@ public void testMapInputRowParserSerde() throws Exception null ) ); - final MapInputRowParser parser2 = jsonMapper.readValue( + final MapInputRowParser parser2 = (MapInputRowParser) jsonMapper.readValue( jsonMapper.writeValueAsBytes(parser), - MapInputRowParser.class + InputRowParser.class ); final InputRow parsed = parser2.parseBatch( ImmutableMap.of( @@ -134,9 +134,9 @@ public void testMapInputRowParserNumbersSerde() throws Exception null ) ); - final MapInputRowParser parser2 = jsonMapper.readValue( + final MapInputRowParser parser2 = (MapInputRowParser) jsonMapper.readValue( jsonMapper.writeValueAsBytes(parser), - MapInputRowParser.class + InputRowParser.class ); final InputRow parsed = parser2.parseBatch( ImmutableMap.of( diff --git a/api/src/test/java/io/druid/data/input/impl/JSONParseSpecTest.java b/api/src/test/java/io/druid/data/input/impl/JSONParseSpecTest.java index de2814eda9a7..c7c73802f73d 100644 --- a/api/src/test/java/io/druid/data/input/impl/JSONParseSpecTest.java +++ b/api/src/test/java/io/druid/data/input/impl/JSONParseSpecTest.java @@ -91,9 +91,9 @@ public void testSerde() throws IOException feature ); - final JSONParseSpec serde = jsonMapper.readValue( + final JSONParseSpec serde = (JSONParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), - JSONParseSpec.class + ParseSpec.class ); Assert.assertEquals("timestamp", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); diff --git a/api/src/test/java/io/druid/data/input/impl/JavaScriptParseSpecTest.java b/api/src/test/java/io/druid/data/input/impl/JavaScriptParseSpecTest.java index 805e019b1b08..b63caf43ff33 100644 --- a/api/src/test/java/io/druid/data/input/impl/JavaScriptParseSpecTest.java +++ b/api/src/test/java/io/druid/data/input/impl/JavaScriptParseSpecTest.java @@ -58,9 +58,9 @@ public void testSerde() throws IOException "abc", JavaScriptConfig.getEnabledInstance() ); - final JavaScriptParseSpec serde = jsonMapper.readValue( + final JavaScriptParseSpec serde = (JavaScriptParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), - JavaScriptParseSpec.class + ParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); diff --git a/api/src/test/java/io/druid/data/input/impl/RegexParseSpecTest.java b/api/src/test/java/io/druid/data/input/impl/RegexParseSpecTest.java index 68930ea6269d..5468ae0302f9 100644 --- a/api/src/test/java/io/druid/data/input/impl/RegexParseSpecTest.java +++ b/api/src/test/java/io/druid/data/input/impl/RegexParseSpecTest.java @@ -43,9 +43,9 @@ public void testSerde() throws IOException Collections.singletonList("abc"), "abc" ); - final RegexParseSpec serde = jsonMapper.readValue( + final RegexParseSpec serde = (RegexParseSpec) jsonMapper.readValue( jsonMapper.writeValueAsString(spec), - RegexParseSpec.class + ParseSpec.class ); Assert.assertEquals("abc", serde.getTimestampSpec().getTimestampColumn()); Assert.assertEquals("iso", serde.getTimestampSpec().getTimestampFormat()); diff --git a/aws-common/pom.xml b/aws-common/pom.xml index 9ce40e3951ea..c6e69b099f66 100644 --- a/aws-common/pom.xml +++ b/aws-common/pom.xml @@ -37,7 +37,7 @@ com.amazonaws - aws-java-sdk-ec2 + aws-java-sdk-bundle diff --git a/aws-common/src/main/java/io/druid/common/aws/AWSEndpointConfig.java b/aws-common/src/main/java/io/druid/common/aws/AWSEndpointConfig.java new file mode 100644 index 000000000000..773a2ab15013 --- /dev/null +++ b/aws-common/src/main/java/io/druid/common/aws/AWSEndpointConfig.java @@ -0,0 +1,52 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.common.aws; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class AWSEndpointConfig +{ + @JsonProperty + private String url; + + @JsonProperty + private String serviceName; + + @JsonProperty + private String signingRegion; + + @JsonProperty + public String getUrl() + { + return url; + } + + @JsonProperty + public String getServiceName() + { + return serviceName; + } + + @JsonProperty + public String getSigningRegion() + { + return signingRegion; + } +} diff --git a/aws-common/src/main/java/io/druid/common/aws/AWSProxyConfig.java b/aws-common/src/main/java/io/druid/common/aws/AWSProxyConfig.java new file mode 100644 index 000000000000..eda04bb37152 --- /dev/null +++ b/aws-common/src/main/java/io/druid/common/aws/AWSProxyConfig.java @@ -0,0 +1,61 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.common.aws; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class AWSProxyConfig +{ + @JsonProperty + private String host; + + @JsonProperty + private int port = -1; // AWS's default proxy port is -1 + + @JsonProperty + private String username; + + @JsonProperty + private String password; + + @JsonProperty + public String getHost() + { + return host; + } + + @JsonProperty + public int getPort() + { + return port; + } + + @JsonProperty + public String getUsername() + { + return username; + } + + @JsonProperty + public String getPassword() + { + return password; + } +} diff --git a/common/pom.xml b/common/pom.xml index 234f21af5bb2..0f07a4105ab8 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -157,21 +157,7 @@ com.lmax disruptor - - - net.java.dev.jets3t - jets3t - 0.9.4 - + org.antlr antlr4-runtime diff --git a/docs/content/development/extensions-core/s3.md b/docs/content/development/extensions-core/s3.md index f24a406946b6..3d32c9de8953 100644 --- a/docs/content/development/extensions-core/s3.md +++ b/docs/content/development/extensions-core/s3.md @@ -12,12 +12,18 @@ S3-compatible deep storage is basically either S3 or something like Google Stora ### Configuration -|Property|Possible Values|Description|Default| -|--------|---------------|-----------|-------| -|`druid.s3.accessKey`||S3 access key.|Must be set.| -|`druid.s3.secretKey`||S3 secret key.|Must be set.| -|`druid.storage.bucket`||Bucket to store in.|Must be set.| -|`druid.storage.baseKey`||Base key prefix to use, i.e. what directory.|Must be set.| +|Property|Description|Default| +|--------|-----------|-------| +|`druid.s3.accessKey`|S3 access key.|Must be set.| +|`druid.s3.secretKey`|S3 secret key.|Must be set.| +|`druid.storage.bucket`|Bucket to store in.|Must be set.| +|`druid.storage.baseKey`|Base key prefix to use, i.e. what directory.|Must be set.| +|`druid.s3.endpoint.url`|Service endpoint either with or without the protocol.|None| +|`druid.s3.endpoint.signingRegion`|Region to use for SigV4 signing of requests (e.g. us-west-1).|None| +|`druid.s3.proxy.host`|Proxy host to connect through.|None| +|`druid.s3.proxy.port`|Port on the proxy host to connect through.|None| +|`druid.s3.proxy.username`|User name to use when connecting through a proxy.|None| +|`druid.s3.proxy.password`|Password to use when connecting through a proxy.|None| ## StaticS3Firehose diff --git a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java b/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java index fbdfb066c8e4..215de9caa4fb 100644 --- a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java +++ b/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java @@ -334,13 +334,14 @@ public Principal getUserPrincipal() }; if (newToken && !token.isExpired() && token != AuthenticationToken.ANONYMOUS) { String signedToken = mySigner.sign(token.toString()); - tokenToAuthCookie(httpResponse, - signedToken, - getCookieDomain(), - getCookiePath(), - token.getExpires(), - !token.isExpired() && token.getExpires() > 0, - isHttps + tokenToAuthCookie( + httpResponse, + signedToken, + getCookieDomain(), + getCookiePath(), + token.getExpires(), + !token.isExpired() && token.getExpires() > 0, + isHttps ); } doFilter(filterChain, httpRequest, httpResponse); @@ -361,8 +362,14 @@ public Principal getUserPrincipal() } if (unauthorizedResponse) { if (!httpResponse.isCommitted()) { - tokenToAuthCookie(httpResponse, "", getCookieDomain(), - getCookiePath(), 0, false, isHttps + tokenToAuthCookie( + httpResponse, + "", + getCookieDomain(), + getCookiePath(), + 0, + false, + isHttps ); // If response code is 401. Then WWW-Authenticate Header should be // present.. reset to 403 if not found.. diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml index f737da39c3e1..4c1a853744fb 100644 --- a/extensions-core/hdfs-storage/pom.xml +++ b/extensions-core/hdfs-storage/pom.xml @@ -145,16 +145,7 @@ org.apache.hadoop hadoop-aws ${hadoop.compile.version} - - - com.amazonaws - aws-java-sdk - - - - - com.amazonaws - aws-java-sdk-s3 + provided commons-io diff --git a/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java b/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java index 6fba009cf865..c960541e634e 100644 --- a/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java +++ b/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java @@ -22,11 +22,11 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Sets; import com.google.inject.Inject; +import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; import io.druid.segment.loading.DataSegmentFinder; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; -import io.druid.java.util.common.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; diff --git a/extensions-core/s3-extensions/pom.xml b/extensions-core/s3-extensions/pom.xml index 3d389eab4d3f..487d33782542 100644 --- a/extensions-core/s3-extensions/pom.xml +++ b/extensions-core/s3-extensions/pom.xml @@ -18,80 +18,75 @@ - 4.0.0 + 4.0.0 - io.druid.extensions - druid-s3-extensions - druid-s3-extensions - druid-s3-extensions + io.druid.extensions + druid-s3-extensions + druid-s3-extensions + druid-s3-extensions - - io.druid - druid - 0.13.0-SNAPSHOT - ../../pom.xml - + + io.druid + druid + 0.13.0-SNAPSHOT + ../../pom.xml + - - - io.druid - druid-api - ${project.parent.version} - provided - - - io.druid - druid-aws-common - ${project.parent.version} - provided - - - io.druid - java-util - ${project.parent.version} - provided - - - net.java.dev.jets3t - jets3t - provided - - - commons-io - commons-io - provided - - - com.fasterxml.jackson.module - jackson-module-guice - ${jackson.version} - provided - + + + io.druid + druid-api + ${project.parent.version} + provided + + + io.druid + druid-aws-common + ${project.parent.version} + provided + + + io.druid + java-util + ${project.parent.version} + provided + + + commons-io + commons-io + provided + + + com.fasterxml.jackson.module + jackson-module-guice + ${jackson.version} + provided + - - - io.druid - druid-server - ${project.parent.version} - test - - - io.druid - druid-processing - ${project.parent.version} - test-jar - test - - - junit - junit - test - - - org.easymock - easymock - test - - + + + io.druid + druid-server + ${project.parent.version} + test + + + io.druid + druid-processing + ${project.parent.version} + test-jar + test + + + junit + junit + test + + + org.easymock + easymock + test + + diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java b/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java index fa649dc495c7..8827fc9ae31d 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java @@ -19,28 +19,32 @@ package io.druid.firehose.s3; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; +import com.google.common.collect.Lists; import io.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; import io.druid.java.util.common.CompressionUtils; import io.druid.java.util.common.IAE; +import io.druid.java.util.common.IOE; +import io.druid.java.util.common.ISE; import io.druid.java.util.common.logger.Logger; import io.druid.storage.s3.S3Utils; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageObjectsChunk; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; +import java.util.Iterator; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; @@ -48,18 +52,18 @@ /** * Builds firehoses that read from a predefined list of S3 objects and then dry up. */ -public class StaticS3FirehoseFactory extends PrefetchableTextFilesFirehoseFactory +public class StaticS3FirehoseFactory extends PrefetchableTextFilesFirehoseFactory { private static final Logger log = new Logger(StaticS3FirehoseFactory.class); - private static final long MAX_LISTING_LENGTH = 1024; + private static final int MAX_LISTING_LENGTH = 1024; - private final RestS3Service s3Client; + private final AmazonS3 s3Client; private final List uris; private final List prefixes; @JsonCreator public StaticS3FirehoseFactory( - @JacksonInject("s3Client") RestS3Service s3Client, + @JacksonInject("s3Client") AmazonS3 s3Client, @JsonProperty("uris") List uris, @JsonProperty("prefixes") List prefixes, @JsonProperty("maxCacheCapacityBytes") Long maxCacheCapacityBytes, @@ -70,7 +74,7 @@ public StaticS3FirehoseFactory( ) { super(maxCacheCapacityBytes, maxFetchCapacityBytes, prefetchTriggerBytes, fetchTimeout, maxFetchRetry); - this.s3Client = Preconditions.checkNotNull(s3Client, "null s3Client"); + this.s3Client = Preconditions.checkNotNull(s3Client, "s3Client"); this.uris = uris == null ? new ArrayList<>() : uris; this.prefixes = prefixes == null ? new ArrayList<>() : prefixes; @@ -104,7 +108,7 @@ public List getPrefixes() } @Override - protected Collection initObjects() throws IOException + protected Collection initObjects() throws IOException { // Here, the returned s3 objects contain minimal information without data. // Getting data is deferred until openObjectStream() is called for each object. @@ -113,53 +117,49 @@ protected Collection initObjects() throws IOException .map( uri -> { final String s3Bucket = uri.getAuthority(); - final S3Object s3Object = new S3Object(extractS3Key(uri)); - s3Object.setBucketName(s3Bucket); - return s3Object; + final String key = S3Utils.extractS3Key(uri); + return S3Utils.getSingleObjectSummary(s3Client, s3Bucket, key); } ) .collect(Collectors.toList()); } else { - final List objects = new ArrayList<>(); + final List objects = new ArrayList<>(); for (URI uri : prefixes) { final String bucket = uri.getAuthority(); - final String prefix = extractS3Key(uri); + final String prefix = S3Utils.extractS3Key(uri); + try { - String lastKey = null; - StorageObjectsChunk objectsChunk; - do { - objectsChunk = s3Client.listObjectsChunked( - bucket, - prefix, - null, - MAX_LISTING_LENGTH, - lastKey - ); - Arrays.stream(objectsChunk.getObjects()) - .filter(storageObject -> !storageObject.isDirectoryPlaceholder()) - .forEach(storageObject -> objects.add((S3Object) storageObject)); - lastKey = objectsChunk.getPriorLastKey(); - } while (!objectsChunk.isListingComplete()); + final Iterator objectSummaryIterator = S3Utils.objectSummaryIterator( + s3Client, + bucket, + prefix, + MAX_LISTING_LENGTH + ); + objects.addAll(Lists.newArrayList(objectSummaryIterator)); } - catch (ServiceException outerException) { + catch (AmazonS3Exception outerException) { log.error(outerException, "Exception while listing on %s", uri); - if (outerException.getResponseCode() == 403) { + if (outerException.getStatusCode() == 403) { // The "Access Denied" means users might not have a proper permission for listing on the given uri. // Usually this is not a problem, but the uris might be the full paths to input objects instead of prefixes. // In this case, users should be able to get objects if they have a proper permission for GetObject. log.warn("Access denied for %s. Try to get the object from the uri without listing", uri); try { - final S3Object s3Object = s3Client.getObject(bucket, prefix); - if (!s3Object.isDirectoryPlaceholder()) { - objects.add(s3Object); + final ObjectMetadata objectMetadata = s3Client.getObjectMetadata(bucket, prefix); + + if (!S3Utils.isDirectoryPlaceholder(prefix, objectMetadata)) { + objects.add(S3Utils.getSingleObjectSummary(s3Client, bucket, prefix)); } else { - throw new IOException(uri + " is a directory placeholder, " - + "but failed to get the object list under the directory due to permission"); + throw new IOE( + "[%s] is a directory placeholder, " + + "but failed to get the object list under the directory due to permission", + uri + ); } } - catch (S3ServiceException innerException) { + catch (AmazonS3Exception innerException) { throw new IOException(innerException); } } else { @@ -171,49 +171,46 @@ protected Collection initObjects() throws IOException } } - private static String extractS3Key(URI uri) - { - return uri.getPath().startsWith("/") - ? uri.getPath().substring(1) - : uri.getPath(); - } - @Override - protected InputStream openObjectStream(S3Object object) throws IOException + protected InputStream openObjectStream(S3ObjectSummary object) throws IOException { try { // Get data of the given object and open an input stream - return s3Client.getObject(object.getBucketName(), object.getKey()).getDataInputStream(); + final S3Object s3Object = s3Client.getObject(object.getBucketName(), object.getKey()); + if (s3Object == null) { + throw new ISE("Failed to get an s3 object for bucket[%s] and key[%s]", object.getBucketName(), object.getKey()); + } + return s3Object.getObjectContent(); } - catch (ServiceException e) { + catch (AmazonS3Exception e) { throw new IOException(e); } } @Override - protected InputStream openObjectStream(S3Object object, long start) throws IOException + protected InputStream openObjectStream(S3ObjectSummary object, long start) throws IOException { + final GetObjectRequest request = new GetObjectRequest(object.getBucketName(), object.getKey()); + request.setRange(start); try { - final S3Object result = s3Client.getObject( - object.getBucketName(), - object.getKey(), - null, - null, - null, - null, - start, - null - ); - - return result.getDataInputStream(); + final S3Object s3Object = s3Client.getObject(request); + if (s3Object == null) { + throw new ISE( + "Failed to get an s3 object for bucket[%s], key[%s], and start[%d]", + object.getBucketName(), + object.getKey(), + start + ); + } + return s3Object.getObjectContent(); } - catch (ServiceException e) { + catch (AmazonS3Exception e) { throw new IOException(e); } } @Override - protected InputStream wrapObjectStream(S3Object object, InputStream stream) throws IOException + protected InputStream wrapObjectStream(S3ObjectSummary object, InputStream stream) throws IOException { return object.getKey().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; } diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/AWSSessionCredentialsAdapter.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/AWSSessionCredentialsAdapter.java deleted file mode 100644 index 7a64a81e7c96..000000000000 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/AWSSessionCredentialsAdapter.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.storage.s3; - -import com.amazonaws.auth.AWSCredentialsProvider; -import org.jets3t.service.security.AWSSessionCredentials; - -public class AWSSessionCredentialsAdapter extends AWSSessionCredentials -{ - private final AWSCredentialsProvider provider; - - public AWSSessionCredentialsAdapter(AWSCredentialsProvider provider) - { - super(null, null, null); - if (provider.getCredentials() instanceof com.amazonaws.auth.AWSSessionCredentials) { - this.provider = provider; - } else { - throw new IllegalArgumentException("provider does not contain session credentials"); - } - } - - @Override - protected String getTypeName() - { - return "AWSSessionCredentialsAdapter"; - } - - @Override - public String getVersionPrefix() - { - return "AWSSessionCredentialsAdapter, version: "; - } - - @Override - public String getAccessKey() - { - return provider.getCredentials().getAWSAccessKeyId(); - } - - @Override - public String getSecretKey() - { - return provider.getCredentials().getAWSSecretKey(); - } - - @Override - public String getSessionToken() - { - com.amazonaws.auth.AWSSessionCredentials sessionCredentials = - (com.amazonaws.auth.AWSSessionCredentials) provider.getCredentials(); - return sessionCredentials.getSessionToken(); - } -} diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java index d7bc1b2d4912..42eef5ce819e 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentArchiver.java @@ -19,6 +19,7 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Objects; import com.google.common.collect.ImmutableMap; @@ -28,7 +29,6 @@ import io.druid.segment.loading.LoadSpec; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; public class S3DataSegmentArchiver extends S3DataSegmentMover implements DataSegmentArchiver @@ -40,7 +40,7 @@ public class S3DataSegmentArchiver extends S3DataSegmentMover implements DataSeg @Inject public S3DataSegmentArchiver( @Json ObjectMapper mapper, - RestS3Service s3Client, + AmazonS3 s3Client, S3DataSegmentArchiverConfig archiveConfig, S3DataSegmentPusherConfig restoreConfig ) diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentFinder.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentFinder.java index d6d773640e8e..649554e7564b 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentFinder.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentFinder.java @@ -19,22 +19,24 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectInputStream; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Throwables; import com.google.common.collect.Sets; import com.google.inject.Inject; - +import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; import io.druid.segment.loading.DataSegmentFinder; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; -import org.jets3t.service.ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; +import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStream; import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -43,13 +45,13 @@ public class S3DataSegmentFinder implements DataSegmentFinder { private static final Logger log = new Logger(S3DataSegmentFinder.class); - private final RestS3Service s3Client; + private final AmazonS3 s3Client; private final ObjectMapper jsonMapper; private final S3DataSegmentPusherConfig config; @Inject public S3DataSegmentFinder( - RestS3Service s3Client, + AmazonS3 s3Client, S3DataSegmentPusherConfig config, ObjectMapper jsonMapper ) @@ -65,24 +67,24 @@ public Set findSegments(String workingDirPath, boolean updateDescri final Set segments = Sets.newHashSet(); try { - Iterator objectsIterator = S3Utils.storageObjectsIterator( + final Iterator objectSummaryIterator = S3Utils.objectSummaryIterator( s3Client, config.getBucket(), workingDirPath.length() == 0 ? config.getBaseKey() : workingDirPath, - config.getMaxListingLength()); + config.getMaxListingLength() + ); - while (objectsIterator.hasNext()) { - StorageObject storageObject = objectsIterator.next(); - storageObject.closeDataInputStream(); + while (objectSummaryIterator.hasNext()) { + final S3ObjectSummary objectSummary = objectSummaryIterator.next(); - if (S3Utils.toFilename(storageObject.getKey()).equals("descriptor.json")) { - final String descriptorJson = storageObject.getKey(); + if (S3Utils.toFilename(objectSummary.getKey()).equals("descriptor.json")) { + final String descriptorJson = objectSummary.getKey(); String indexZip = S3Utils.indexZipForSegmentPath(descriptorJson); - if (S3Utils.isObjectInBucket(s3Client, config.getBucket(), indexZip)) { - S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson); - - try (InputStream is = indexObject.getDataInputStream()) { + if (S3Utils.isObjectInBucketIgnoringPermission(s3Client, config.getBucket(), indexZip)) { + try (S3Object indexObject = s3Client.getObject(config.getBucket(), descriptorJson); + S3ObjectInputStream is = indexObject.getObjectContent()) { + final ObjectMetadata objectMetadata = indexObject.getObjectMetadata(); final DataSegment dataSegment = jsonMapper.readValue(is, DataSegment.class); log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip); @@ -99,8 +101,10 @@ public Set findSegments(String workingDirPath, boolean updateDescri descriptorJson, indexObject ); - S3Object newDescJsonObject = new S3Object(descriptorJson, jsonMapper.writeValueAsString(dataSegment)); - s3Client.putObject(config.getBucket(), newDescJsonObject); + final ByteArrayInputStream bais = new ByteArrayInputStream( + StringUtils.toUtf8(jsonMapper.writeValueAsString(dataSegment)) + ); + s3Client.putObject(config.getBucket(), descriptorJson, bais, objectMetadata); } } segments.add(dataSegment); @@ -114,7 +118,7 @@ public Set findSegments(String workingDirPath, boolean updateDescri } } } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new SegmentLoadingException(e, "Problem interacting with S3"); } catch (IOException e) { diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java index b1503e42b75f..4053fdd6056a 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentKiller.java @@ -19,14 +19,14 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; import com.google.inject.Inject; import io.druid.java.util.common.MapUtils; import io.druid.java.util.common.logger.Logger; import io.druid.segment.loading.DataSegmentKiller; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; -import org.jets3t.service.ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; import java.util.Map; @@ -36,11 +36,11 @@ public class S3DataSegmentKiller implements DataSegmentKiller { private static final Logger log = new Logger(S3DataSegmentKiller.class); - private final RestS3Service s3Client; + private final AmazonS3 s3Client; @Inject public S3DataSegmentKiller( - RestS3Service s3Client + AmazonS3 s3Client ) { this.s3Client = s3Client; @@ -55,16 +55,16 @@ public void kill(DataSegment segment) throws SegmentLoadingException String s3Path = MapUtils.getString(loadSpec, "key"); String s3DescriptorPath = S3Utils.descriptorPathForSegmentPath(s3Path); - if (s3Client.isObjectInBucket(s3Bucket, s3Path)) { + if (s3Client.doesObjectExist(s3Bucket, s3Path)) { log.info("Removing index file[s3://%s/%s] from s3!", s3Bucket, s3Path); s3Client.deleteObject(s3Bucket, s3Path); } - if (s3Client.isObjectInBucket(s3Bucket, s3DescriptorPath)) { + if (s3Client.doesObjectExist(s3Bucket, s3DescriptorPath)) { log.info("Removing descriptor file[s3://%s/%s] from s3!", s3Bucket, s3DescriptorPath); s3Client.deleteObject(s3Bucket, s3DescriptorPath); } } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getIdentifier(), e); } } diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java index 1c22418e2b4c..e50ea2cca700 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentMover.java @@ -19,6 +19,13 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.amazonaws.services.s3.model.StorageClass; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; @@ -34,10 +41,6 @@ import io.druid.segment.loading.DataSegmentPusher; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; -import org.jets3t.service.ServiceException; -import org.jets3t.service.acl.gs.GSAccessControlList; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import java.io.IOException; import java.util.Map; @@ -46,12 +49,12 @@ public class S3DataSegmentMover implements DataSegmentMover { private static final Logger log = new Logger(S3DataSegmentMover.class); - private final RestS3Service s3Client; + private final AmazonS3 s3Client; private final S3DataSegmentPusherConfig config; @Inject public S3DataSegmentMover( - RestS3Service s3Client, + AmazonS3 s3Client, S3DataSegmentPusherConfig config ) { @@ -103,7 +106,7 @@ public boolean apply(String input) .build() ); } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new SegmentLoadingException(e, "Unable to move segment[%s]: [%s]", segment.getIdentifier(), e); } } @@ -113,7 +116,7 @@ private void safeMove( final String s3Path, final String targetS3Bucket, final String targetS3Path - ) throws ServiceException, SegmentLoadingException + ) throws SegmentLoadingException { try { S3Utils.retryS3Operation( @@ -129,7 +132,7 @@ private void safeMove( selfCheckingMove(s3Bucket, targetS3Bucket, s3Path, targetS3Path, copyMsg); return null; } - catch (ServiceException | IOException | SegmentLoadingException e) { + catch (AmazonServiceException | IOException | SegmentLoadingException e) { log.info(e, "Error while trying to move " + copyMsg); throw e; } @@ -137,7 +140,7 @@ private void safeMove( ); } catch (Exception e) { - Throwables.propagateIfInstanceOf(e, ServiceException.class); + Throwables.propagateIfInstanceOf(e, AmazonServiceException.class); Throwables.propagateIfInstanceOf(e, SegmentLoadingException.class); throw Throwables.propagate(e); } @@ -155,40 +158,41 @@ private void selfCheckingMove( String s3Path, String targetS3Path, String copyMsg - ) throws ServiceException, IOException, SegmentLoadingException + ) throws IOException, SegmentLoadingException { if (s3Bucket.equals(targetS3Bucket) && s3Path.equals(targetS3Path)) { log.info("No need to move file[s3://%s/%s] onto itself", s3Bucket, s3Path); return; } - if (s3Client.isObjectInBucket(s3Bucket, s3Path)) { - final S3Object[] list = s3Client.listObjects(s3Bucket, s3Path, ""); - if (list.length == 0) { + if (s3Client.doesObjectExist(s3Bucket, s3Path)) { + final ListObjectsV2Result listResult = s3Client.listObjectsV2( + new ListObjectsV2Request() + .withBucketName(s3Bucket) + .withPrefix(s3Path) + .withMaxKeys(1) + ); + if (listResult.getKeyCount() == 0) { // should never happen throw new ISE("Unable to list object [s3://%s/%s]", s3Bucket, s3Path); } - final S3Object s3Object = list[0]; - if (s3Object.getStorageClass() != null && - s3Object.getStorageClass().equals(S3Object.STORAGE_CLASS_GLACIER)) { - throw new ServiceException(StringUtils.format( - "Cannot move file[s3://%s/%s] of storage class glacier, skipping.", - s3Bucket, - s3Path - )); + final S3ObjectSummary objectSummary = listResult.getObjectSummaries().get(0); + if (objectSummary.getStorageClass() != null && + StorageClass.fromValue(StringUtils.toUpperCase(objectSummary.getStorageClass())).equals(StorageClass.Glacier)) { + throw new AmazonServiceException( + StringUtils.format( + "Cannot move file[s3://%s/%s] of storage class glacier, skipping.", + s3Bucket, + s3Path + ) + ); } else { log.info("Moving file %s", copyMsg); - final S3Object target = new S3Object(targetS3Path); + final CopyObjectRequest copyRequest = new CopyObjectRequest(s3Bucket, s3Path, targetS3Bucket, targetS3Path); if (!config.getDisableAcl()) { - target.setAcl(GSAccessControlList.REST_CANNED_BUCKET_OWNER_FULL_CONTROL); + copyRequest.setAccessControlList(S3Utils.grantFullControlToBucketOwner(s3Client, targetS3Bucket)); } - s3Client.copyObject( - s3Bucket, - s3Path, - targetS3Bucket, - target, - false - ); - if (!s3Client.isObjectInBucket(targetS3Bucket, targetS3Path)) { + s3Client.copyObject(copyRequest); + if (!s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) { throw new IOE( "After copy was reported as successful the file doesn't exist in the target location [%s]", copyMsg @@ -199,7 +203,7 @@ private void selfCheckingMove( } } else { // ensure object exists in target location - if (s3Client.isObjectInBucket(targetS3Bucket, targetS3Path)) { + if (s3Client.doesObjectExist(targetS3Bucket, targetS3Path)) { log.info( "Not moving file [s3://%s/%s], already present in target location [s3://%s/%s]", s3Bucket, s3Path, diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java index 1b2765af2094..55a00a76b8b8 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPuller.java @@ -19,6 +19,11 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.base.Throwables; @@ -33,17 +38,15 @@ import io.druid.java.util.common.RE; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.UOE; +import io.druid.java.util.common.io.Closer; import io.druid.java.util.common.logger.Logger; import io.druid.segment.loading.SegmentLoadingException; import io.druid.segment.loading.URIDataPuller; import io.druid.timeline.DataSegment; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.StorageObject; import javax.tools.FileObject; import java.io.File; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -59,17 +62,15 @@ public class S3DataSegmentPuller implements URIDataPuller { public static final int DEFAULT_RETRY_COUNT = 3; - public static FileObject buildFileObject(final URI uri, final RestS3Service s3Client) throws ServiceException + private static FileObject buildFileObject(final URI uri, final AmazonS3 s3Client) throws AmazonServiceException { final S3Coords coords = new S3Coords(checkURI(uri)); - final StorageObject s3Obj = s3Client.getObjectDetails(coords.bucket, coords.path); + final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.bucket, coords.path); final String path = uri.getPath(); return new FileObject() { - final Object inputStreamOpener = new Object(); - volatile boolean streamAcquired = false; - volatile StorageObject storageObject = s3Obj; + S3Object s3Object = null; @Override public URI toUri() @@ -84,22 +85,33 @@ public String getName() return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext)); } + /** + * Returns an input stream for a s3 object. The returned input stream is not thread-safe. + */ @Override public InputStream openInputStream() throws IOException { try { - synchronized (inputStreamOpener) { - if (streamAcquired) { - return storageObject.getDataInputStream(); - } + if (s3Object == null) { // lazily promote to full GET - storageObject = s3Client.getObject(s3Obj.getBucketName(), s3Obj.getKey()); - final InputStream stream = storageObject.getDataInputStream(); - streamAcquired = true; - return stream; + s3Object = s3Client.getObject(objectSummary.getBucketName(), objectSummary.getKey()); } + + final InputStream in = s3Object.getObjectContent(); + final Closer closer = Closer.create(); + closer.register(in); + closer.register(s3Object); + + return new FilterInputStream(in) + { + @Override + public void close() throws IOException + { + closer.close(); + } + }; } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new IOE(e, "Could not load S3 URI [%s]", uri); } } @@ -131,7 +143,7 @@ public Writer openWriter() @Override public long getLastModified() { - return s3Obj.getLastModifiedDate().getTime(); + return objectSummary.getLastModified().getTime(); } @Override @@ -149,11 +161,11 @@ public boolean delete() protected static final String BUCKET = "bucket"; protected static final String KEY = "key"; - protected final RestS3Service s3Client; + protected final AmazonS3 s3Client; @Inject public S3DataSegmentPuller( - RestS3Service s3Client + AmazonS3 s3Client ) { this.s3Client = s3Client; @@ -180,7 +192,7 @@ public InputStream openStream() throws IOException try { return buildFileObject(uri, s3Client).openInputStream(); } - catch (ServiceException e) { + catch (AmazonServiceException e) { if (e.getCause() != null) { if (S3Utils.S3RETRY.apply(e)) { throw new IOException("Recoverable exception", e); @@ -242,7 +254,7 @@ public InputStream getInputStream(URI uri) throws IOException try { return buildFileObject(uri, s3Client).openInputStream(); } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new IOE(e, "Could not load URI [%s]", uri); } } @@ -259,8 +271,8 @@ public boolean apply(Throwable e) if (e == null) { return false; } - if (e instanceof ServiceException) { - return S3Utils.isServiceExceptionRecoverable((ServiceException) e); + if (e instanceof AmazonServiceException) { + return S3Utils.isServiceExceptionRecoverable((AmazonServiceException) e); } if (S3Utils.S3RETRY.apply(e)) { return true; @@ -284,10 +296,11 @@ public boolean apply(Throwable e) public String getVersion(URI uri) throws IOException { try { - final FileObject object = buildFileObject(uri, s3Client); - return StringUtils.format("%d", object.getLastModified()); + final S3Coords coords = new S3Coords(checkURI(uri)); + final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.bucket, coords.path); + return StringUtils.format("%d", objectSummary.getLastModified().getTime()); } - catch (ServiceException e) { + catch (AmazonServiceException e) { if (S3Utils.isServiceExceptionRecoverable(e)) { // The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri); @@ -301,10 +314,10 @@ private boolean isObjectInBucket(final S3Coords coords) throws SegmentLoadingExc { try { return S3Utils.retryS3Operation( - () -> S3Utils.isObjectInBucket(s3Client, coords.bucket, coords.path) + () -> S3Utils.isObjectInBucketIgnoringPermission(s3Client, coords.bucket, coords.path) ); } - catch (S3ServiceException | IOException e) { + catch (AmazonS3Exception | IOException e) { throw new SegmentLoadingException(e, "S3 fail! Key[%s]", coords); } catch (Exception e) { diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java index 985121bef622..981d24a7ef5e 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java @@ -19,21 +19,20 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.PutObjectRequest; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.inject.Inject; -import io.druid.java.util.emitter.EmittingLogger; import io.druid.java.util.common.CompressionUtils; import io.druid.java.util.common.StringUtils; +import io.druid.java.util.emitter.EmittingLogger; import io.druid.segment.SegmentUtils; import io.druid.segment.loading.DataSegmentPusher; import io.druid.timeline.DataSegment; -import org.jets3t.service.ServiceException; -import org.jets3t.service.acl.gs.GSAccessControlList; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import java.io.File; import java.io.IOException; @@ -46,13 +45,13 @@ public class S3DataSegmentPusher implements DataSegmentPusher { private static final EmittingLogger log = new EmittingLogger(S3DataSegmentPusher.class); - private final RestS3Service s3Client; + private final AmazonS3 s3Client; private final S3DataSegmentPusherConfig config; private final ObjectMapper jsonMapper; @Inject public S3DataSegmentPusher( - RestS3Service s3Client, + AmazonS3 s3Client, S3DataSegmentPusherConfig config, ObjectMapper jsonMapper ) @@ -97,45 +96,43 @@ public DataSegment push(final File indexFilesDir, final DataSegment inSegment, f final File zipOutFile = File.createTempFile("druid", "index.zip"); final long indexSize = CompressionUtils.zip(indexFilesDir, zipOutFile); + final DataSegment outSegment = inSegment.withSize(indexSize) + .withLoadSpec(makeLoadSpec(config.getBucket(), s3Path)) + .withBinaryVersion(SegmentUtils.getVersionFromDir(indexFilesDir)); + + final File descriptorFile = File.createTempFile("druid", "descriptor.json"); + // Avoid using Guava in DataSegmentPushers because they might be used with very diverse Guava versions in + // runtime, and because Guava deletes methods over time, that causes incompatibilities. + Files.write(descriptorFile.toPath(), jsonMapper.writeValueAsBytes(outSegment)); + try { return S3Utils.retryS3Operation( () -> { - S3Object toPush = new S3Object(zipOutFile); - putObject(config.getBucket(), s3Path, toPush, replaceExisting); - - final DataSegment outSegment = inSegment.withSize(indexSize) - .withLoadSpec(makeLoadSpec(config.getBucket(), toPush.getKey())) - .withBinaryVersion(SegmentUtils.getVersionFromDir(indexFilesDir)); - - File descriptorFile = File.createTempFile("druid", "descriptor.json"); - // Avoid using Guava in DataSegmentPushers because they might be used with very diverse Guava versions in - // runtime, and because Guava deletes methods over time, that causes incompatibilities. - Files.write(descriptorFile.toPath(), jsonMapper.writeValueAsBytes(outSegment)); - S3Object descriptorObject = new S3Object(descriptorFile); - - putObject( + uploadFileIfPossible(s3Client, config.getBucket(), s3Path, zipOutFile, replaceExisting); + uploadFileIfPossible( + s3Client, config.getBucket(), S3Utils.descriptorPathForSegmentPath(s3Path), - descriptorObject, + descriptorFile, replaceExisting ); - log.info("Deleting zipped index File[%s]", zipOutFile); - zipOutFile.delete(); - - log.info("Deleting descriptor file[%s]", descriptorFile); - descriptorFile.delete(); - return outSegment; } ); } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new IOException(e); } catch (Exception e) { throw Throwables.propagate(e); } + finally { + log.info("Deleting temporary cached index.zip"); + zipOutFile.delete(); + log.info("Deleting temporary cached descriptor.json"); + descriptorFile.delete(); + } } @Override @@ -163,21 +160,26 @@ private Map makeLoadSpec(String bucket, String key) ); } - private void putObject(String bucketName, String path, S3Object object, boolean replaceExisting) - throws ServiceException + private void uploadFileIfPossible( + AmazonS3 s3Client, + String bucket, + String key, + File file, + boolean replaceExisting + ) { - object.setBucketName(bucketName); - object.setKey(path); - if (!config.getDisableAcl()) { - object.setAcl(GSAccessControlList.REST_CANNED_BUCKET_OWNER_FULL_CONTROL); - } - - log.info("Pushing %s.", object); - - if (!replaceExisting && S3Utils.isObjectInBucket(s3Client, bucketName, object.getKey())) { - log.info("Skipping push because key [%s] exists && replaceExisting == false", object.getKey()); + if (!replaceExisting && S3Utils.isObjectInBucketIgnoringPermission(s3Client, bucket, key)) { + log.info("Skipping push because key [%s] exists && replaceExisting == false", key); } else { - s3Client.putObject(bucketName, object); + final PutObjectRequest indexFilePutRequest = new PutObjectRequest(bucket, key, file); + + if (!config.getDisableAcl()) { + indexFilePutRequest.setAccessControlList( + S3Utils.grantFullControlToBucketOwner(s3Client, bucket) + ); + } + log.info("Pushing [%s] to bucket[%s] and key[%s].", file, bucket, key); + s3Client.putObject(indexFilePutRequest); } } } diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java index 8b9b3fdd7cc4..ac7839f43631 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3StorageDruidModule.java @@ -19,8 +19,12 @@ package io.druid.storage.s3; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.ClientConfigurationFactory; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.S3ClientOptions; import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.Module; import com.google.common.collect.ImmutableList; @@ -28,13 +32,14 @@ import com.google.inject.Provides; import com.google.inject.multibindings.MapBinder; import io.druid.common.aws.AWSCredentialsConfig; +import io.druid.common.aws.AWSEndpointConfig; +import io.druid.common.aws.AWSProxyConfig; import io.druid.data.SearchableVersionedDataFinder; import io.druid.guice.Binders; import io.druid.guice.JsonConfigProvider; import io.druid.guice.LazySingleton; import io.druid.initialization.DruidModule; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.security.AWSCredentials; +import org.apache.commons.lang.StringUtils; import java.util.List; @@ -75,6 +80,8 @@ public void setupModule(SetupContext context) public void configure(Binder binder) { JsonConfigProvider.bind(binder, "druid.s3", AWSCredentialsConfig.class); + JsonConfigProvider.bind(binder, "druid.s3.proxy", AWSProxyConfig.class); + JsonConfigProvider.bind(binder, "druid.s3.endpoint", AWSEndpointConfig.class); MapBinder.newMapBinder(binder, String.class, SearchableVersionedDataFinder.class) .addBinding("s3") .to(S3TimestampVersionedDataFinder.class) @@ -101,15 +108,44 @@ public void configure(Binder binder) @Provides @LazySingleton - public RestS3Service getRestS3Service(AWSCredentialsProvider provider) + public AmazonS3 getAmazonS3Client( + AWSCredentialsProvider provider, + AWSProxyConfig proxyConfig, + AWSEndpointConfig endpointConfig + ) { - if (provider.getCredentials() instanceof AWSSessionCredentials) { - return new RestS3Service(new AWSSessionCredentialsAdapter(provider)); - } else { - return new RestS3Service(new AWSCredentials( - provider.getCredentials().getAWSAccessKeyId(), - provider.getCredentials().getAWSSecretKey() - )); + // AmazonS3ClientBuilder can't be used because it makes integration tests failed + final ClientConfiguration configuration = new ClientConfigurationFactory().getConfig(); + final AmazonS3Client client = new AmazonS3Client(provider, setProxyConfig(configuration, proxyConfig)); + + if (StringUtils.isNotEmpty(endpointConfig.getUrl())) { + if (StringUtils.isNotEmpty(endpointConfig.getServiceName()) && + StringUtils.isNotEmpty(endpointConfig.getSigningRegion())) { + client.setEndpoint(endpointConfig.getUrl(), endpointConfig.getServiceName(), endpointConfig.getSigningRegion()); + } else { + client.setEndpoint(endpointConfig.getUrl()); + } + } + + client.setS3ClientOptions(S3ClientOptions.builder().enableForceGlobalBucketAccess().build()); + + return client; + } + + private static ClientConfiguration setProxyConfig(ClientConfiguration conf, AWSProxyConfig proxyConfig) + { + if (StringUtils.isNotEmpty(proxyConfig.getHost())) { + conf.setProxyHost(proxyConfig.getHost()); + } + if (proxyConfig.getPort() != -1) { + conf.setProxyPort(proxyConfig.getPort()); + } + if (StringUtils.isNotEmpty(proxyConfig.getUsername())) { + conf.setProxyUsername(proxyConfig.getUsername()); + } + if (StringUtils.isNotEmpty(proxyConfig.getPassword())) { + conf.setProxyPassword(proxyConfig.getPassword()); } + return conf; } } diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java index 426221f508b3..afef97a8892c 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java @@ -19,6 +19,11 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.io.ByteSource; @@ -27,10 +32,6 @@ import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; import io.druid.tasklogs.TaskLogs; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageService; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.StorageObject; import java.io.File; import java.io.IOException; @@ -43,11 +44,11 @@ public class S3TaskLogs implements TaskLogs { private static final Logger log = new Logger(S3TaskLogs.class); - private final StorageService service; + private final AmazonS3 service; private final S3TaskLogsConfig config; @Inject - public S3TaskLogs(S3TaskLogsConfig config, RestS3Service service) + public S3TaskLogs(S3TaskLogsConfig config, AmazonS3 service) { this.config = config; this.service = service; @@ -59,9 +60,9 @@ public Optional streamTaskLog(final String taskid, final long offset final String taskKey = getTaskLogKey(taskid); try { - final StorageObject objectDetails = service.getObjectDetails(config.getS3Bucket(), taskKey, null, null, null, null); + final ObjectMetadata objectMetadata = service.getObjectMetadata(config.getS3Bucket(), taskKey); - return Optional.of( + return Optional.of( new ByteSource() { @Override @@ -69,36 +70,31 @@ public InputStream openStream() throws IOException { try { final long start; - final long end = objectDetails.getContentLength() - 1; + final long end = objectMetadata.getContentLength() - 1; - if (offset > 0 && offset < objectDetails.getContentLength()) { + if (offset > 0 && offset < objectMetadata.getContentLength()) { start = offset; - } else if (offset < 0 && (-1 * offset) < objectDetails.getContentLength()) { - start = objectDetails.getContentLength() + offset; + } else if (offset < 0 && (-1 * offset) < objectMetadata.getContentLength()) { + start = objectMetadata.getContentLength() + offset; } else { start = 0; } - return service.getObject( - config.getS3Bucket(), - taskKey, - null, - null, - new String[]{objectDetails.getETag()}, - null, - start, - end - ).getDataInputStream(); + final GetObjectRequest request = new GetObjectRequest(config.getS3Bucket(), taskKey) + .withMatchingETagConstraint(objectMetadata.getETag()) + .withRange(start, end); + + return service.getObject(request).getObjectContent(); } - catch (ServiceException e) { + catch (AmazonServiceException e) { throw new IOException(e); } } } ); } - catch (ServiceException e) { - if (404 == e.getResponseCode() + catch (AmazonS3Exception e) { + if (404 == e.getStatusCode() || "NoSuchKey".equals(e.getErrorCode()) || "NoSuchBucket".equals(e.getErrorCode())) { return Optional.absent(); @@ -117,9 +113,7 @@ public void pushTaskLog(final String taskid, final File logFile) throws IOExcept try { S3Utils.retryS3Operation( () -> { - final StorageObject object = new StorageObject(logFile); - object.setKey(taskKey); - service.putObject(config.getS3Bucket(), object); + service.putObject(config.getS3Bucket(), taskKey, logFile); return null; } ); diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TimestampVersionedDataFinder.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TimestampVersionedDataFinder.java index 8014ec8ac88d..2d4724851b7c 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TimestampVersionedDataFinder.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TimestampVersionedDataFinder.java @@ -19,16 +19,17 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.base.Throwables; import com.google.inject.Inject; import io.druid.data.SearchableVersionedDataFinder; import io.druid.java.util.common.RetryUtils; import io.druid.java.util.common.StringUtils; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import javax.annotation.Nullable; import java.net.URI; +import java.util.Iterator; import java.util.regex.Pattern; /** @@ -37,8 +38,10 @@ */ public class S3TimestampVersionedDataFinder extends S3DataSegmentPuller implements SearchableVersionedDataFinder { + private static final int MAX_LISTING_KEYS = 1000; + @Inject - public S3TimestampVersionedDataFinder(RestS3Service s3Client) + public S3TimestampVersionedDataFinder(AmazonS3 s3Client) { super(s3Client); } @@ -65,23 +68,27 @@ public URI getLatestVersion(final URI uri, final @Nullable Pattern pattern) final S3Coords coords = new S3Coords(checkURI(uri)); long mostRecent = Long.MIN_VALUE; URI latest = null; - S3Object[] objects = s3Client.listObjects(coords.bucket, coords.path, null); - if (objects == null) { - return null; - } - for (S3Object storageObject : objects) { - storageObject.closeDataInputStream(); - String keyString = storageObject.getKey().substring(coords.path.length()); + final Iterator objectSummaryIterator = S3Utils.objectSummaryIterator( + s3Client, + coords.bucket, + coords.path, + MAX_LISTING_KEYS + ); + while (objectSummaryIterator.hasNext()) { + final S3ObjectSummary objectSummary = objectSummaryIterator.next(); + String keyString = objectSummary.getKey().substring(coords.path.length()); if (keyString.startsWith("/")) { keyString = keyString.substring(1); } if (pattern != null && !pattern.matcher(keyString).matches()) { continue; } - final long latestModified = storageObject.getLastModifiedDate().getTime(); + final long latestModified = objectSummary.getLastModified().getTime(); if (latestModified >= mostRecent) { mostRecent = latestModified; - latest = new URI(StringUtils.format("s3://%s/%s", storageObject.getBucketName(), storageObject.getKey())); + latest = new URI( + StringUtils.format("s3://%s/%s", objectSummary.getBucketName(), objectSummary.getKey()) + ); } } return latest; diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java index 2a9372e96d08..c4fa15761066 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3Utils.java @@ -19,19 +19,27 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.AccessControlList; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.CanonicalGrantee; +import com.amazonaws.services.s3.model.Grant; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.Permission; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.google.common.base.Joiner; import com.google.common.base.Predicate; -import com.google.common.base.Throwables; +import io.druid.java.util.common.ISE; import io.druid.java.util.common.RetryUtils; import io.druid.java.util.common.RetryUtils.Task; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageObjectsChunk; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; import java.io.IOException; +import java.net.URI; import java.util.Iterator; +import java.util.NoSuchElementException; /** * @@ -39,25 +47,12 @@ public class S3Utils { private static final Joiner JOINER = Joiner.on("/").skipNulls(); + private static final String MIMETYPE_JETS3T_DIRECTORY = "application/x-directory"; - public static void closeStreamsQuietly(S3Object s3Obj) - { - if (s3Obj == null) { - return; - } - - try { - s3Obj.closeDataInputStream(); - } - catch (IOException e) { - - } - } - - public static boolean isServiceExceptionRecoverable(ServiceException ex) + static boolean isServiceExceptionRecoverable(AmazonServiceException ex) { final boolean isIOException = ex.getCause() instanceof IOException; - final boolean isTimeout = "RequestTimeout".equals(((ServiceException) ex).getErrorCode()); + final boolean isTimeout = "RequestTimeout".equals(ex.getErrorCode()); return isIOException || isTimeout; } @@ -70,8 +65,8 @@ public boolean apply(Throwable e) return false; } else if (e instanceof IOException) { return true; - } else if (e instanceof ServiceException) { - return isServiceExceptionRecoverable((ServiceException) e); + } else if (e instanceof AmazonServiceException) { + return isServiceExceptionRecoverable((AmazonServiceException) e); } else { return apply(e.getCause()); } @@ -88,91 +83,81 @@ public static T retryS3Operation(Task f) throws Exception return RetryUtils.retry(f, S3RETRY, maxTries); } - public static boolean isObjectInBucket(RestS3Service s3Client, String bucketName, String objectKey) - throws ServiceException + static boolean isObjectInBucketIgnoringPermission(AmazonS3 s3Client, String bucketName, String objectKey) { try { - s3Client.getObjectDetails(bucketName, objectKey); + return s3Client.doesObjectExist(bucketName, objectKey); } - catch (ServiceException e) { - if (404 == e.getResponseCode() - || "NoSuchKey".equals(e.getErrorCode()) - || "NoSuchBucket".equals(e.getErrorCode())) { - return false; - } - if ("AccessDenied".equals(e.getErrorCode())) { + catch (AmazonS3Exception e) { + if (e.getStatusCode() == 404) { // Object is inaccessible to current user, but does exist. return true; } // Something else has gone wrong throw e; } - return true; } - public static Iterator storageObjectsIterator( - final RestS3Service s3Client, + public static Iterator objectSummaryIterator( + final AmazonS3 s3Client, final String bucket, final String prefix, - final long maxListingLength + final int numMaxKeys ) { - return new Iterator() + final ListObjectsV2Request request = new ListObjectsV2Request() + .withBucketName(bucket) + .withPrefix(prefix) + .withMaxKeys(numMaxKeys); + + return new Iterator() { - private StorageObjectsChunk objectsChunk; - private int objectsChunkOffset; + private ListObjectsV2Result result; + private Iterator objectSummaryIterator; - @Override - public boolean hasNext() { - if (objectsChunk == null) { - objectsChunk = listObjectsChunkedAfter(""); - objectsChunkOffset = 0; - } - - if (objectsChunk.getObjects().length <= objectsChunkOffset) { - if (objectsChunk.isListingComplete()) { - return false; - } else { - objectsChunk = listObjectsChunkedAfter(objectsChunk.getPriorLastKey()); - objectsChunkOffset = 0; - } - } + fetchNextBatch(); + } - return true; + private void fetchNextBatch() + { + result = s3Client.listObjectsV2(request); + objectSummaryIterator = result.getObjectSummaries().iterator(); + request.setContinuationToken(result.getContinuationToken()); } - private StorageObjectsChunk listObjectsChunkedAfter(final String priorLastKey) + @Override + public boolean hasNext() { - try { - return retryS3Operation( - () -> s3Client.listObjectsChunked(bucket, prefix, null, maxListingLength, priorLastKey) - ); - } - catch (Exception e) { - throw Throwables.propagate(e); - } + return objectSummaryIterator.hasNext() || result.isTruncated(); } @Override - public StorageObject next() + public S3ObjectSummary next() { if (!hasNext()) { - throw new IllegalStateException(); + throw new NoSuchElementException(); } - StorageObject storageObject = objectsChunk.getObjects()[objectsChunkOffset]; - objectsChunkOffset++; - return storageObject; - } + if (objectSummaryIterator.hasNext()) { + return objectSummaryIterator.next(); + } - @Override - public void remove() - { - throw new UnsupportedOperationException(); - } + if (result.isTruncated()) { + fetchNextBatch(); + } + if (!objectSummaryIterator.hasNext()) { + throw new ISE( + "Failed to further iterate on bucket[%s] and prefix[%s]. The last continuationToken was [%s]", + bucket, + prefix, + result.getContinuationToken() + ); + } + return objectSummaryIterator.next(); + } }; } @@ -184,25 +169,93 @@ public static String constructSegmentPath(String baseKey, String storageDir) ) + "/index.zip"; } - public static String descriptorPathForSegmentPath(String s3Path) + static String descriptorPathForSegmentPath(String s3Path) { return s3Path.substring(0, s3Path.lastIndexOf("/")) + "/descriptor.json"; } - public static String indexZipForSegmentPath(String s3Path) + static String indexZipForSegmentPath(String s3Path) { return s3Path.substring(0, s3Path.lastIndexOf("/")) + "/index.zip"; } - public static String toFilename(String key) + static String toFilename(String key) { return toFilename(key, ""); } - public static String toFilename(String key, final String suffix) + static String toFilename(String key, final String suffix) { String filename = key.substring(key.lastIndexOf("/") + 1); // characters after last '/' filename = filename.substring(0, filename.length() - suffix.length()); // remove the suffix from the end return filename; } + + static AccessControlList grantFullControlToBucketOwner(AmazonS3 s3Client, String bucket) + { + final AccessControlList acl = s3Client.getBucketAcl(bucket); + acl.grantAllPermissions(new Grant(new CanonicalGrantee(acl.getOwner().getId()), Permission.FullControl)); + return acl; + } + + public static String extractS3Key(URI uri) + { + return uri.getPath().startsWith("/") ? uri.getPath().substring(1) : uri.getPath(); + } + + // Copied from org.jets3t.service.model.StorageObject.isDirectoryPlaceholder() + public static boolean isDirectoryPlaceholder(String key, ObjectMetadata objectMetadata) + { + // Recognize "standard" directory place-holder indications used by + // Amazon's AWS Console and Panic's Transmit. + if (key.endsWith("/") && objectMetadata.getContentLength() == 0) { + return true; + } + // Recognize s3sync.rb directory placeholders by MD5/ETag value. + if ("d66759af42f282e1ba19144df2d405d0".equals(objectMetadata.getETag())) { + return true; + } + // Recognize place-holder objects created by the Google Storage console + // or S3 Organizer Firefox extension. + if (key.endsWith("_$folder$") && objectMetadata.getContentLength() == 0) { + return true; + } + + // We don't use JetS3t APIs anymore, but the below check is still needed for backward compatibility. + + // Recognize legacy JetS3t directory place-holder objects, only gives + // accurate results if an object's metadata is populated. + if (objectMetadata.getContentLength() == 0 && MIMETYPE_JETS3T_DIRECTORY.equals(objectMetadata.getContentType())) { + return true; + } + return false; + } + + /** + * Gets a single {@link S3ObjectSummary} from s3. Since this method might return a wrong object if there are multiple + * objects that match the given key, this method should be used only when it's guaranteed that the given key is unique + * in the given bucket. + * + * @param s3Client s3 client + * @param bucket s3 bucket + * @param key unique key for the object to be retrieved + */ + public static S3ObjectSummary getSingleObjectSummary(AmazonS3 s3Client, String bucket, String key) + { + final ListObjectsV2Request request = new ListObjectsV2Request() + .withBucketName(bucket) + .withPrefix(key) + .withMaxKeys(1); + final ListObjectsV2Result result = s3Client.listObjectsV2(request); + + if (result.getKeyCount() == 0) { + throw new ISE("Cannot find object for bucket[%s] and key[%s]", bucket, key); + } + final S3ObjectSummary objectSummary = result.getObjectSummaries().get(0); + if (!objectSummary.getBucketName().equals(bucket) || !objectSummary.getKey().equals(key)) { + throw new ISE("Wrong object[%s] for bucket[%s] and key[%s]", objectSummary, bucket, key); + } + + return objectSummary; + } } diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/firehose/s3/StaticS3FirehoseFactoryTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/firehose/s3/StaticS3FirehoseFactoryTest.java index 3a5c94471cda..e60210126943 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/firehose/s3/StaticS3FirehoseFactoryTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/firehose/s3/StaticS3FirehoseFactoryTest.java @@ -19,8 +19,13 @@ package io.druid.firehose.s3; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.module.guice.ObjectMapperModule; import com.google.common.collect.ImmutableList; @@ -29,8 +34,6 @@ import com.google.inject.Injector; import com.google.inject.Provides; import io.druid.initialization.DruidModule; -import io.druid.jackson.DefaultObjectMapper; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.junit.Assert; import org.junit.Test; @@ -42,7 +45,7 @@ */ public class StaticS3FirehoseFactoryTest { - private static final RestS3Service SERVICE = new RestS3Service(null); + private static final AmazonS3Client SERVICE = new AmazonS3Client(); @Test public void testSerde() throws Exception @@ -75,14 +78,14 @@ public void testSerde() throws Exception private static ObjectMapper createObjectMapper(DruidModule baseModule) { - final ObjectMapper baseMapper = new DefaultObjectMapper(); - baseModule.getJacksonModules().forEach(baseMapper::registerModule); - final Injector injector = Guice.createInjector( new ObjectMapperModule(), baseModule ); - return injector.getInstance(ObjectMapper.class); + final ObjectMapper baseMapper = injector.getInstance(ObjectMapper.class); + + baseModule.getJacksonModules().forEach(baseMapper::registerModule); + return baseMapper; } private static class TestS3Module implements DruidModule @@ -90,7 +93,9 @@ private static class TestS3Module implements DruidModule @Override public List getJacksonModules() { - return ImmutableList.of(new SimpleModule()); + // Deserializer is need for AmazonS3Client even though it is injected. + // See https://github.com/FasterXML/jackson-databind/issues/962. + return ImmutableList.of(new SimpleModule().addDeserializer(AmazonS3.class, new ItemDeserializer())); } @Override @@ -100,9 +105,28 @@ public void configure(Binder binder) } @Provides - public RestS3Service getRestS3Service() + public AmazonS3 getAmazonS3Client() { return SERVICE; } } + + public static class ItemDeserializer extends StdDeserializer + { + public ItemDeserializer() + { + this(null); + } + + public ItemDeserializer(Class vc) + { + super(vc); + } + + @Override + public AmazonS3 deserialize(JsonParser jp, DeserializationContext ctxt) + { + throw new UnsupportedOperationException(); + } + } } diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentArchiverTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentArchiverTest.java index d28d5f933895..d93dfc08c553 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentArchiverTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentArchiverTest.java @@ -19,6 +19,7 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3Client; import com.fasterxml.jackson.databind.BeanProperty; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.InjectableValues; @@ -30,7 +31,6 @@ import io.druid.java.util.common.Intervals; import io.druid.timeline.DataSegment; import org.easymock.EasyMock; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -65,7 +65,7 @@ public String getArchiveBaseKey() } }; private static final S3DataSegmentPusherConfig PUSHER_CONFIG = new S3DataSegmentPusherConfig(); - private static final RestS3Service S3_SERVICE = EasyMock.createStrictMock(RestS3Service.class); + private static final AmazonS3Client S3_SERVICE = EasyMock.createStrictMock(AmazonS3Client.class); private static final S3DataSegmentPuller PULLER = new S3DataSegmentPuller(S3_SERVICE); private static final DataSegment SOURCE_SEGMENT = DataSegment .builder() diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentFinderTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentFinderTest.java index 12f76126bb4e..5c449faf2e10 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentFinderTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentFinderTest.java @@ -19,6 +19,15 @@ package io.druid.storage.s3; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.jsontype.NamedType; import com.google.common.base.Predicate; @@ -31,17 +40,13 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.StringUtils; import io.druid.segment.TestHelper; import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NumberedShardSpec; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageObjectsChunk; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; @@ -50,8 +55,12 @@ import org.junit.rules.TemporaryFolder; import javax.annotation.Nullable; +import java.io.ByteArrayInputStream; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.InputStream; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; @@ -105,7 +114,7 @@ public class S3DataSegmentFinderTest @Rule public final TemporaryFolder temporaryFolder = new TemporaryFolder(); - RestS3Service mockS3Client; + MockAmazonS3Client mockS3Client; S3DataSegmentPusherConfig config; private String bucket; @@ -122,8 +131,6 @@ public class S3DataSegmentFinderTest private String indexZip4_0; private String indexZip4_1; - - @BeforeClass public static void setUpStatic() { @@ -140,7 +147,7 @@ public void setUp() throws Exception config.setBucket(bucket); config.setBaseKey(baseKey); - mockS3Client = new MockStorageService(temporaryFolder.newFolder()); + mockS3Client = new MockAmazonS3Client(temporaryFolder.newFolder()); descriptor1 = S3Utils.descriptorPathForSegmentPath(baseKey + "/interval1/v1/0/"); @@ -154,17 +161,17 @@ public void setUp() throws Exception indexZip4_0 = S3Utils.indexZipForSegmentPath(descriptor4_0); indexZip4_1 = S3Utils.indexZipForSegmentPath(descriptor4_1); - mockS3Client.putObject(bucket, new S3Object(descriptor1, mapper.writeValueAsString(SEGMENT_1))); - mockS3Client.putObject(bucket, new S3Object(descriptor2, mapper.writeValueAsString(SEGMENT_2))); - mockS3Client.putObject(bucket, new S3Object(descriptor3, mapper.writeValueAsString(SEGMENT_3))); - mockS3Client.putObject(bucket, new S3Object(descriptor4_0, mapper.writeValueAsString(SEGMENT_4_0))); - mockS3Client.putObject(bucket, new S3Object(descriptor4_1, mapper.writeValueAsString(SEGMENT_4_1))); - - mockS3Client.putObject(bucket, new S3Object(indexZip1, "dummy")); - mockS3Client.putObject(bucket, new S3Object(indexZip2, "dummy")); - mockS3Client.putObject(bucket, new S3Object(indexZip3, "dummy")); - mockS3Client.putObject(bucket, new S3Object(indexZip4_0, "dummy")); - mockS3Client.putObject(bucket, new S3Object(indexZip4_1, "dummy")); + mockS3Client.putObject(bucket, descriptor1, mapper.writeValueAsString(SEGMENT_1)); + mockS3Client.putObject(bucket, descriptor2, mapper.writeValueAsString(SEGMENT_2)); + mockS3Client.putObject(bucket, descriptor3, mapper.writeValueAsString(SEGMENT_3)); + mockS3Client.putObject(bucket, descriptor4_0, mapper.writeValueAsString(SEGMENT_4_0)); + mockS3Client.putObject(bucket, descriptor4_1, mapper.writeValueAsString(SEGMENT_4_1)); + + mockS3Client.putObject(bucket, indexZip1, "dummy"); + mockS3Client.putObject(bucket, indexZip2, "dummy"); + mockS3Client.putObject(bucket, indexZip3, "dummy"); + mockS3Client.putObject(bucket, indexZip4_0, "dummy"); + mockS3Client.putObject(bucket, indexZip4_1, "dummy"); } @Test @@ -210,34 +217,34 @@ public void testFindSegments() throws Exception final String serializedSegment4_1 = mapper.writeValueAsString(updatedSegment4_1); Assert.assertNotEquals(serializedSegment1, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getObjectContent())); Assert.assertNotEquals(serializedSegment2, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getObjectContent())); Assert.assertNotEquals(serializedSegment3, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getObjectContent())); Assert.assertNotEquals(serializedSegment4_0, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getObjectContent())); Assert.assertNotEquals(serializedSegment4_1, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getObjectContent())); final Set segments2 = s3DataSegmentFinder.findSegments("", true); Assert.assertEquals(segments, segments2); Assert.assertEquals(serializedSegment1, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor1).getObjectContent())); Assert.assertEquals(serializedSegment2, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor2).getObjectContent())); Assert.assertEquals(serializedSegment3, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor3).getObjectContent())); Assert.assertEquals(serializedSegment4_0, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_0).getObjectContent())); Assert.assertEquals(serializedSegment4_1, - IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getDataInputStream())); + IOUtils.toString(mockS3Client.getObject(bucket, descriptor4_1).getObjectContent())); } @Test(expected = SegmentLoadingException.class) - public void testFindSegmentsFail() throws SegmentLoadingException, ServiceException + public void testFindSegmentsFail() throws SegmentLoadingException { mockS3Client.deleteObject(bucket, indexZip4_1); @@ -275,21 +282,8 @@ public void testFindSegmentsUpdateLoadSpec() throws Exception final String descriptorPath = S3Utils.descriptorPathForSegmentPath(segmentPath); final String indexPath = S3Utils.indexZipForSegmentPath(segmentPath); - mockS3Client.putObject( - config.getBucket(), - new S3Object( - descriptorPath, - mapper.writeValueAsString(segmentMissingLoadSpec) - ) - ); - - mockS3Client.putObject( - config.getBucket(), - new S3Object( - indexPath, - "dummy" - ) - ); + mockS3Client.putObject(config.getBucket(), descriptorPath, mapper.writeValueAsString(segmentMissingLoadSpec)); + mockS3Client.putObject(config.getBucket(), indexPath, "dummy"); Set segments = s3DataSegmentFinder.findSegments(segmentPath, false); Assert.assertEquals(1, segments.size()); @@ -308,24 +302,34 @@ private String getDescriptorPath(DataSegment segment) return S3Utils.descriptorPathForSegmentPath(String.valueOf(segment.getLoadSpec().get("key"))); } - private static class MockStorageService extends RestS3Service + private static class MockAmazonS3Client extends AmazonS3Client { private final File baseDir; private final Map> storage = Maps.newHashMap(); - public MockStorageService(File baseDir) + public MockAmazonS3Client(File baseDir) { - super(null); + super(); this.baseDir = baseDir; } @Override - public StorageObjectsChunk listObjectsChunked( - final String bucketName, final String prefix, final String delimiter, - final long maxListingLength, final String priorLastKey - ) throws ServiceException + public boolean doesObjectExist(String bucketName, String objectName) + { + final Set keys = storage.get(bucketName); + if (keys != null) { + return keys.contains(objectName); + } + return false; + } + + @Override + public ListObjectsV2Result listObjectsV2(ListObjectsV2Request listObjectsV2Request) { - List keysOrigin = Lists.newArrayList(storage.get(bucketName)); + final String bucketName = listObjectsV2Request.getBucketName(); + final String prefix = listObjectsV2Request.getPrefix(); + + final List keysOrigin = Lists.newArrayList(storage.get(bucketName)); Predicate prefixFilter = new Predicate() { @@ -341,11 +345,11 @@ public boolean apply(@Nullable String input) ); int startOffset = 0; - if (priorLastKey != null) { - startOffset = keys.indexOf(priorLastKey) + 1; + if (listObjectsV2Request.getContinuationToken() != null) { + startOffset = keys.indexOf(listObjectsV2Request.getContinuationToken()) + 1; } - int endOffset = startOffset + (int) maxListingLength; // exclusive + int endOffset = startOffset + listObjectsV2Request.getMaxKeys(); // exclusive if (endOffset > keys.size()) { endOffset = keys.size(); } @@ -355,64 +359,73 @@ public boolean apply(@Nullable String input) newPriorLastkey = null; } - List objects = Lists.newArrayList(); + List objects = new ArrayList<>(); for (String objectKey : keys.subList(startOffset, endOffset)) { - objects.add(getObjectDetails(bucketName, objectKey)); + final S3ObjectSummary objectSummary = new S3ObjectSummary(); + objectSummary.setBucketName(bucketName); + objectSummary.setKey(objectKey); + objects.add(objectSummary); } - return new StorageObjectsChunk( - prefix, delimiter, objects.toArray(new StorageObject[]{}), null, newPriorLastkey); + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.setBucketName(bucketName); + result.setKeyCount(objects.size()); + result.getObjectSummaries().addAll(objects); + result.setContinuationToken(newPriorLastkey); + result.setTruncated(newPriorLastkey != null); + + return result; } @Override - public StorageObject getObjectDetails(String bucketName, String objectKey) throws ServiceException + public S3Object getObject(String bucketName, String objectKey) { - if (!storage.containsKey(bucketName)) { - ServiceException ex = new ServiceException(); - ex.setResponseCode(404); + AmazonServiceException ex = new AmazonS3Exception("S3DataSegmentFinderTest"); + ex.setStatusCode(404); ex.setErrorCode("NoSuchBucket"); throw ex; } if (!storage.get(bucketName).contains(objectKey)) { - ServiceException ex = new ServiceException(); - ex.setResponseCode(404); + AmazonServiceException ex = new AmazonS3Exception("S3DataSegmentFinderTest"); + ex.setStatusCode(404); ex.setErrorCode("NoSuchKey"); throw ex; } final File objectPath = new File(baseDir, objectKey); - StorageObject storageObject = new StorageObject(); + S3Object storageObject = new S3Object(); storageObject.setBucketName(bucketName); storageObject.setKey(objectKey); - storageObject.setDataInputFile(objectPath); + try { + storageObject.setObjectContent(new FileInputStream(objectPath)); + } + catch (FileNotFoundException e) { + AmazonServiceException ex = new AmazonS3Exception("S3DataSegmentFinderTest", e); + ex.setStatusCode(500); + ex.setErrorCode("InternalError"); + throw ex; + } return storageObject; } @Override - public S3Object getObject(String bucketName, String objectKey) + public PutObjectResult putObject(String bucketName, String key, String data) { - final File objectPath = new File(baseDir, objectKey); - S3Object s3Object = new S3Object(); - s3Object.setBucketName(bucketName); - s3Object.setKey(objectKey); - s3Object.setDataInputFile(objectPath); - - return s3Object; - + return putObject(bucketName, key, new ByteArrayInputStream(StringUtils.toUtf8(data)), null); } @Override - public S3Object putObject(final String bucketName, final S3Object object) + public PutObjectResult putObject(String bucketName, String key, InputStream input, ObjectMetadata metadata) { if (!storage.containsKey(bucketName)) { - storage.put(bucketName, Sets.newHashSet()); + storage.put(bucketName, Sets.newHashSet()); } - storage.get(bucketName).add(object.getKey()); + storage.get(bucketName).add(key); - final File objectPath = new File(baseDir, object.getKey()); + final File objectPath = new File(baseDir, key); if (!objectPath.getParentFile().exists()) { objectPath.getParentFile().mkdirs(); @@ -420,7 +433,7 @@ public S3Object putObject(final String bucketName, final S3Object object) try { try ( - InputStream in = object.getDataInputStream() + InputStream in = input ) { FileUtils.copyInputStreamToFile(in, objectPath); } @@ -429,7 +442,7 @@ public S3Object putObject(final String bucketName, final S3Object object) throw Throwables.propagate(e); } - return object; + return new PutObjectResult(); } @Override diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java index 95ddcdfb7dd9..a848eb180822 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentMoverTest.java @@ -19,6 +19,22 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.AccessControlList; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.CanonicalGrantee; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.CopyObjectResult; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import com.amazonaws.services.s3.model.Grant; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.Owner; +import com.amazonaws.services.s3.model.Permission; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.amazonaws.services.s3.model.StorageClass; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; @@ -28,12 +44,11 @@ import io.druid.segment.loading.SegmentLoadingException; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; import org.junit.Assert; import org.junit.Test; +import java.io.File; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -59,11 +74,17 @@ public class S3DataSegmentMoverTest @Test public void testMove() throws Exception { - MockStorageService mockS3Client = new MockStorageService(); + MockAmazonS3Client mockS3Client = new MockAmazonS3Client(); S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig()); - mockS3Client.putObject("main", new S3Object("baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip")); - mockS3Client.putObject("main", new S3Object("baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json")); + mockS3Client.putObject( + "main", + "baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip" + ); + mockS3Client.putObject( + "main", + "baseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json" + ); DataSegment movedSegment = mover.move( sourceSegment, @@ -79,11 +100,17 @@ public void testMove() throws Exception @Test public void testMoveNoop() throws Exception { - MockStorageService mockS3Client = new MockStorageService(); + MockAmazonS3Client mockS3Client = new MockAmazonS3Client(); S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig()); - mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip")); - mockS3Client.putObject("archive", new S3Object("targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json")); + mockS3Client.putObject( + "archive", + "targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/index.zip" + ); + mockS3Client.putObject( + "archive", + "targetBaseKey/test/2013-01-01T00:00:00.000Z_2013-01-02T00:00:00.000Z/1/0/descriptor.json" + ); DataSegment movedSegment = mover.move( sourceSegment, @@ -100,7 +127,7 @@ public void testMoveNoop() throws Exception @Test(expected = SegmentLoadingException.class) public void testMoveException() throws Exception { - MockStorageService mockS3Client = new MockStorageService(); + MockAmazonS3Client mockS3Client = new MockAmazonS3Client(); S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig()); mover.move( @@ -112,7 +139,7 @@ public void testMoveException() throws Exception @Test public void testIgnoresGoneButAlreadyMoved() throws Exception { - MockStorageService mockS3Client = new MockStorageService(); + MockAmazonS3Client mockS3Client = new MockAmazonS3Client(); S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig()); mover.move(new DataSegment( "test", @@ -135,7 +162,7 @@ public void testIgnoresGoneButAlreadyMoved() throws Exception @Test(expected = SegmentLoadingException.class) public void testFailsToMoveMissing() throws Exception { - MockStorageService mockS3Client = new MockStorageService(); + MockAmazonS3Client mockS3Client = new MockAmazonS3Client(); S3DataSegmentMover mover = new S3DataSegmentMover(mockS3Client, new S3DataSegmentPusherConfig()); mover.move(new DataSegment( "test", @@ -155,15 +182,15 @@ public void testFailsToMoveMissing() throws Exception ), ImmutableMap.of("bucket", "DOES NOT EXIST", "baseKey", "baseKey2")); } - private static class MockStorageService extends RestS3Service + private static class MockAmazonS3Client extends AmazonS3Client { Map> storage = Maps.newHashMap(); boolean copied = false; boolean deletedOld = false; - private MockStorageService() + private MockAmazonS3Client() { - super(null); + super(); } public boolean didMove() @@ -172,37 +199,68 @@ public boolean didMove() } @Override - public boolean isObjectInBucket(String bucketName, String objectKey) + public AccessControlList getBucketAcl(String bucketName) + { + final AccessControlList acl = new AccessControlList(); + acl.setOwner(new Owner("ownerId", "owner")); + acl.grantAllPermissions(new Grant(new CanonicalGrantee(acl.getOwner().getId()), Permission.FullControl)); + return acl; + } + + @Override + public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) + { + return new ObjectMetadata(); + } + + @Override + public boolean doesObjectExist(String bucketName, String objectKey) { Set objects = storage.get(bucketName); return (objects != null && objects.contains(objectKey)); } @Override - public S3Object[] listObjects(String bucketName, String objectKey, String separator) + public ListObjectsV2Result listObjectsV2(ListObjectsV2Request listObjectsV2Request) { - if (isObjectInBucket(bucketName, objectKey)) { - final S3Object object = new S3Object(objectKey); - object.setStorageClass(S3Object.STORAGE_CLASS_STANDARD); - return new S3Object[]{object}; + final String bucketName = listObjectsV2Request.getBucketName(); + final String objectKey = listObjectsV2Request.getPrefix(); + if (doesObjectExist(bucketName, objectKey)) { + final S3ObjectSummary objectSummary = new S3ObjectSummary(); + objectSummary.setBucketName(bucketName); + objectSummary.setKey(objectKey); + objectSummary.setStorageClass(StorageClass.Standard.name()); + + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.setBucketName(bucketName); + result.setPrefix(objectKey); + result.setKeyCount(1); + result.getObjectSummaries().add(objectSummary); + result.setTruncated(true); + return result; + } else { + return new ListObjectsV2Result(); } - return new S3Object[]{}; } @Override - public Map copyObject( - String sourceBucketName, - String sourceObjectKey, - String destinationBucketName, - StorageObject destinationObject, - boolean replaceMetadata - ) + public CopyObjectResult copyObject(CopyObjectRequest copyObjectRequest) { + final String sourceBucketName = copyObjectRequest.getSourceBucketName(); + final String sourceObjectKey = copyObjectRequest.getSourceKey(); + final String destinationBucketName = copyObjectRequest.getDestinationBucketName(); + final String destinationObjectKey = copyObjectRequest.getDestinationKey(); copied = true; - if (isObjectInBucket(sourceBucketName, sourceObjectKey)) { - this.putObject(destinationBucketName, new S3Object(destinationObject.getKey())); + if (doesObjectExist(sourceBucketName, sourceObjectKey)) { + storage.computeIfAbsent(destinationBucketName, k -> new HashSet<>()) + .add(destinationObjectKey); + return new CopyObjectResult(); + } else { + final AmazonS3Exception exception = new AmazonS3Exception("S3DataSegmentMoverTest"); + exception.setErrorCode("NoSuchKey"); + exception.setStatusCode(404); + throw exception; } - return null; } @Override @@ -212,14 +270,19 @@ public void deleteObject(String bucket, String objectKey) storage.get(bucket).remove(objectKey); } + public PutObjectResult putObject(String bucketName, String key) + { + return putObject(bucketName, key, (File) null); + } + @Override - public S3Object putObject(String bucketName, S3Object object) + public PutObjectResult putObject(String bucketName, String key, File file) { if (!storage.containsKey(bucketName)) { storage.put(bucketName, Sets.newHashSet()); } - storage.get(bucketName).add(object.getKey()); - return object; + storage.get(bucketName).add(key); + return new PutObjectResult(); } } } diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPullerTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPullerTest.java index 303bf657d6e5..8bc028a64f0a 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPullerTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPullerTest.java @@ -19,9 +19,21 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.S3ObjectSummary; import io.druid.java.util.common.FileUtils; import io.druid.java.util.common.StringUtils; import io.druid.segment.loading.SegmentLoadingException; +import org.easymock.EasyMock; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -30,15 +42,6 @@ import java.net.URI; import java.util.Date; import java.util.zip.GZIPOutputStream; -import org.easymock.EasyMock; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; /** * @@ -50,26 +53,29 @@ public class S3DataSegmentPullerTest public TemporaryFolder temporaryFolder = new TemporaryFolder(); @Test - public void testSimpleGetVersion() throws ServiceException, IOException + public void testSimpleGetVersion() throws IOException { String bucket = "bucket"; String keyPrefix = "prefix/dir/0"; - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - S3Object object0 = new S3Object(); + final S3ObjectSummary objectSummary = new S3ObjectSummary(); + objectSummary.setBucketName(bucket); + objectSummary.setKey(keyPrefix + "/renames-0.gz"); + objectSummary.setLastModified(new Date(0)); - object0.setBucketName(bucket); - object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.setKeyCount(1); + result.getObjectSummaries().add(objectSummary); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))) - .andReturn(object0) + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(result) .once(); S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client); EasyMock.replay(s3Client); - String version = puller.getVersion(URI.create(StringUtils.format("s3://%s/%s", bucket, object0.getKey()))); + String version = puller.getVersion(URI.create(StringUtils.format("s3://%s/%s", bucket, objectSummary.getKey()))); EasyMock.verify(s3Client); @@ -77,11 +83,11 @@ public void testSimpleGetVersion() throws ServiceException, IOException } @Test - public void testGZUncompress() throws ServiceException, IOException, SegmentLoadingException + public void testGZUncompress() throws IOException, SegmentLoadingException { final String bucket = "bucket"; final String keyPrefix = "prefix/dir/0"; - final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + final AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); final byte[] value = bucket.getBytes("utf8"); final File tmpFile = temporaryFolder.newFile("gzTest.gz"); @@ -91,19 +97,27 @@ public void testGZUncompress() throws ServiceException, IOException, SegmentLoad } final S3Object object0 = new S3Object(); - object0.setBucketName(bucket); object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); - object0.setDataInputStream(new FileInputStream(tmpFile)); + object0.getObjectMetadata().setLastModified(new Date(0)); + object0.setObjectContent(new FileInputStream(tmpFile)); + + final S3ObjectSummary objectSummary = new S3ObjectSummary(); + objectSummary.setBucketName(bucket); + objectSummary.setKey(keyPrefix + "/renames-0.gz"); + objectSummary.setLastModified(new Date(0)); + + final ListObjectsV2Result listObjectsResult = new ListObjectsV2Result(); + listObjectsResult.setKeyCount(1); + listObjectsResult.getObjectSummaries().add(objectSummary); final File tmpDir = temporaryFolder.newFolder("gzTestDir"); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) - .andReturn(null) + EasyMock.expect(s3Client.doesObjectExist(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) + .andReturn(true) .once(); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) - .andReturn(object0) + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(listObjectsResult) .once(); EasyMock.expect(s3Client.getObject(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) .andReturn(object0) @@ -126,11 +140,11 @@ public void testGZUncompress() throws ServiceException, IOException, SegmentLoad } @Test - public void testGZUncompressRetries() throws ServiceException, IOException, SegmentLoadingException + public void testGZUncompressRetries() throws IOException, SegmentLoadingException { final String bucket = "bucket"; final String keyPrefix = "prefix/dir/0"; - final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + final AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); final byte[] value = bucket.getBytes("utf8"); final File tmpFile = temporaryFolder.newFile("gzTest.gz"); @@ -143,25 +157,34 @@ public void testGZUncompressRetries() throws ServiceException, IOException, Segm object0.setBucketName(bucket); object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); - object0.setDataInputStream(new FileInputStream(tmpFile)); + object0.getObjectMetadata().setLastModified(new Date(0)); + object0.setObjectContent(new FileInputStream(tmpFile)); + + final S3ObjectSummary objectSummary = new S3ObjectSummary(); + objectSummary.setBucketName(bucket); + objectSummary.setKey(keyPrefix + "/renames-0.gz"); + objectSummary.setLastModified(new Date(0)); + + final ListObjectsV2Result listObjectsResult = new ListObjectsV2Result(); + listObjectsResult.setKeyCount(1); + listObjectsResult.getObjectSummaries().add(objectSummary); File tmpDir = temporaryFolder.newFolder("gzTestDir"); - S3ServiceException exception = new S3ServiceException(); + AmazonS3Exception exception = new AmazonS3Exception("S3DataSegmentPullerTest"); exception.setErrorCode("NoSuchKey"); - exception.setResponseCode(404); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) - .andReturn(null) + exception.setStatusCode(404); + EasyMock.expect(s3Client.doesObjectExist(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) + .andReturn(true) .once(); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) - .andReturn(object0) + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(listObjectsResult) .once(); EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))) .andThrow(exception) .once(); - EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey()))) - .andReturn(object0) + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(listObjectsResult) .once(); EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))) .andReturn(object0) diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPusherTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPusherTest.java index f26bd1610398..c787ed7d900c 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPusherTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3DataSegmentPusherTest.java @@ -19,6 +19,14 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.AccessControlList; +import com.amazonaws.services.s3.model.CanonicalGrantee; +import com.amazonaws.services.s3.model.Grant; +import com.amazonaws.services.s3.model.Owner; +import com.amazonaws.services.s3.model.Permission; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.PutObjectResult; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -31,14 +39,13 @@ import org.easymock.Capture; import org.easymock.EasyMock; import org.easymock.IAnswer; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.File; +import java.io.FileInputStream; /** */ @@ -65,27 +72,38 @@ public void setValue(T value) @Test public void testPush() throws Exception { - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - Capture capturedS3Object = Capture.newInstance(); + final AccessControlList acl = new AccessControlList(); + acl.setOwner(new Owner("ownerId", "owner")); + acl.grantAllPermissions(new Grant(new CanonicalGrantee(acl.getOwner().getId()), Permission.FullControl)); + EasyMock.expect(s3Client.getBucketAcl(EasyMock.eq("bucket"))).andReturn(acl).once(); + + EasyMock.expect(s3Client.putObject(EasyMock.anyObject())) + .andReturn(new PutObjectResult()) + .once(); + + EasyMock.expect(s3Client.getBucketAcl(EasyMock.eq("bucket"))).andReturn(acl).once(); + + Capture capturedPutRequest = Capture.newInstance(); ValueContainer capturedS3SegmentJson = new ValueContainer<>(); - EasyMock.expect(s3Client.putObject(EasyMock.anyString(), EasyMock.capture(capturedS3Object))) + EasyMock.expect(s3Client.putObject(EasyMock.capture(capturedPutRequest))) .andAnswer( - new IAnswer() + new IAnswer() { @Override - public S3Object answer() throws Throwable + public PutObjectResult answer() throws Throwable { capturedS3SegmentJson.setValue( - IOUtils.toString(capturedS3Object.getValue().getDataInputStream(), "utf-8") + IOUtils.toString(new FileInputStream(capturedPutRequest.getValue().getFile()), "utf-8") ); - return null; + return new PutObjectResult(); } } ) - .atLeastOnce(); - EasyMock.replay(s3Client); + .once(); + EasyMock.replay(s3Client); S3DataSegmentPusherConfig config = new S3DataSegmentPusherConfig(); config.setBucket("bucket"); diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3TimestampVersionedDataFinderTest.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3TimestampVersionedDataFinderTest.java index c2bdc347334d..ce19c9b10fc7 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3TimestampVersionedDataFinderTest.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/S3TimestampVersionedDataFinderTest.java @@ -19,11 +19,12 @@ package io.druid.storage.s3; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.S3ObjectSummary; import io.druid.java.util.common.StringUtils; import org.easymock.EasyMock; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.S3Object; import org.junit.Assert; import org.junit.Test; @@ -35,25 +36,31 @@ public class S3TimestampVersionedDataFinderTest { @Test - public void testSimpleLatestVersion() throws S3ServiceException + public void testSimpleLatestVersion() { String bucket = "bucket"; String keyPrefix = "prefix/dir/0"; - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - S3Object object0 = new S3Object(), object1 = new S3Object(); + S3ObjectSummary object0 = new S3ObjectSummary(), object1 = new S3ObjectSummary(); object0.setBucketName(bucket); object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); + object0.setLastModified(new Date(0)); object1.setBucketName(bucket); object1.setKey(keyPrefix + "/renames-1.gz"); - object1.setLastModifiedDate(new Date(1)); + object1.setLastModified(new Date(1)); - EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.isNull())).andReturn( - new S3Object[]{object0, object1} - ).once(); + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.getObjectSummaries().add(object0); + result.getObjectSummaries().add(object1); + result.setKeyCount(2); + result.setTruncated(false); + + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(result) + .once(); S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client); Pattern pattern = Pattern.compile("renames-[0-9]*\\.gz"); @@ -71,25 +78,19 @@ public void testSimpleLatestVersion() throws S3ServiceException } @Test - public void testMissing() throws S3ServiceException + public void testMissing() { String bucket = "bucket"; String keyPrefix = "prefix/dir/0"; - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - S3Object object0 = new S3Object(), object1 = new S3Object(); + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.setKeyCount(0); + result.setTruncated(false); - object0.setBucketName(bucket); - object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); - - object1.setBucketName(bucket); - object1.setKey(keyPrefix + "/renames-1.gz"); - object1.setLastModifiedDate(new Date(1)); - - EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.isNull())).andReturn( - null - ).once(); + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(result) + .once(); S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client); Pattern pattern = Pattern.compile("renames-[0-9]*\\.gz"); @@ -105,21 +106,26 @@ public void testMissing() throws S3ServiceException } @Test - public void testFindSelf() throws S3ServiceException + public void testFindSelf() { String bucket = "bucket"; String keyPrefix = "prefix/dir/0"; - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - S3Object object0 = new S3Object(); + S3ObjectSummary object0 = new S3ObjectSummary(); object0.setBucketName(bucket); object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); + object0.setLastModified(new Date(0)); - EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.isNull())).andReturn( - new S3Object[]{object0} - ).once(); + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.getObjectSummaries().add(object0); + result.setKeyCount(1); + result.setTruncated(false); + + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(result) + .once(); S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client); Pattern pattern = Pattern.compile("renames-[0-9]*\\.gz"); @@ -137,21 +143,26 @@ public void testFindSelf() throws S3ServiceException } @Test - public void testFindExact() throws S3ServiceException + public void testFindExact() { String bucket = "bucket"; String keyPrefix = "prefix/dir/0"; - RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class); + AmazonS3Client s3Client = EasyMock.createStrictMock(AmazonS3Client.class); - S3Object object0 = new S3Object(); + S3ObjectSummary object0 = new S3ObjectSummary(); object0.setBucketName(bucket); object0.setKey(keyPrefix + "/renames-0.gz"); - object0.setLastModifiedDate(new Date(0)); + object0.setLastModified(new Date(0)); + + final ListObjectsV2Result result = new ListObjectsV2Result(); + result.getObjectSummaries().add(object0); + result.setKeyCount(1); + result.setTruncated(false); - EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.isNull())).andReturn( - new S3Object[]{object0} - ).once(); + EasyMock.expect(s3Client.listObjectsV2(EasyMock.anyObject(ListObjectsV2Request.class))) + .andReturn(result) + .once(); S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client); diff --git a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/TestAWSCredentialsProvider.java b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/TestAWSCredentialsProvider.java index a3bf27a40b47..a7716e2a5ed8 100644 --- a/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/TestAWSCredentialsProvider.java +++ b/extensions-core/s3-extensions/src/test/java/io/druid/storage/s3/TestAWSCredentialsProvider.java @@ -24,6 +24,8 @@ import com.amazonaws.auth.AWSSessionCredentials; import com.google.common.io.Files; import io.druid.common.aws.AWSCredentialsConfig; +import io.druid.common.aws.AWSEndpointConfig; +import io.druid.common.aws.AWSProxyConfig; import io.druid.guice.AWSModule; import io.druid.metadata.DefaultPasswordProvider; import org.easymock.EasyMock; @@ -58,7 +60,7 @@ public void testWithFixedAWSKeys() assertEquals(credentials.getAWSSecretKey(), "secretKeySample"); // try to create - s3Module.getRestS3Service(provider); + s3Module.getAmazonS3Client(provider, new AWSProxyConfig(), new AWSEndpointConfig()); } @Rule @@ -86,6 +88,6 @@ public void testWithFileSessionCredentials() throws IOException assertEquals(sessionCredentials.getSessionToken(), "sessionTokenSample"); // try to create - s3Module.getRestS3Service(provider); + s3Module.getAmazonS3Client(provider, new AWSProxyConfig(), new AWSEndpointConfig()); } } diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index e2bf002c3d55..3a9c7740a033 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -56,23 +56,6 @@ com.google.guava guava - - - net.java.dev.jets3t - jets3t - test - - - - org.apache.httpcomponents - httpclient - test - - - org.apache.httpcomponents - httpcore - test - org.apache.hadoop hadoop-client @@ -100,6 +83,21 @@ + + com.amazonaws + aws-java-sdk-bundle + test + + + org.apache.httpcomponents + httpclient + test + + + org.apache.httpcomponents + httpcore + test + junit junit diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java index 9a6c5d9ecc96..44e75805d77d 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java @@ -395,7 +395,7 @@ public static class DetermineHashedPartitionsPartitioner public int getPartition(LongWritable interval, BytesWritable text, int numPartitions) { - if (config.get("mapred.job.tracker").equals("local") || determineIntervals) { + if ("local".equals(config.get("mapred.job.tracker")) || determineIntervals) { return 0; } else { return reducerLookup.get(interval); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java b/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java index 6a9370324d0a..9152c8732109 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/config/TaskConfig.java @@ -31,7 +31,7 @@ public class TaskConfig { public static final List DEFAULT_DEFAULT_HADOOP_COORDINATES = ImmutableList.of( - "org.apache.hadoop:hadoop-client:2.7.3" + "org.apache.hadoop:hadoop-client:2.8.3" ); private static final Period DEFAULT_DIRECTORY_LOCK_TIMEOUT = new Period("PT10M"); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index 199aacdd8bb5..8963559e3123 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -35,7 +35,6 @@ import java.io.File; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.net.URLClassLoader; @@ -127,7 +126,6 @@ public boolean apply(@Nullable URL input) * * @param toolbox The toolbox to pull the default coordinates from if not present in the task * @return An isolated URLClassLoader not tied by parent chain to the ApplicationClassLoader - * @throws MalformedURLException from Initialization.getClassLoaderForExtension */ protected ClassLoader buildClassLoader(final TaskToolbox toolbox) { diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/autoscaling/EC2AutoScalerSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/autoscaling/EC2AutoScalerSerdeTest.java index 5d891d62e641..18a46365edb6 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/autoscaling/EC2AutoScalerSerdeTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/autoscaling/EC2AutoScalerSerdeTest.java @@ -74,12 +74,12 @@ public Object findInjectableValue( } ); - final EC2AutoScaler autoScaler = objectMapper.readValue(json, EC2AutoScaler.class); + final EC2AutoScaler autoScaler = (EC2AutoScaler) objectMapper.readValue(json, AutoScaler.class); verifyAutoScaler(autoScaler); - final EC2AutoScaler roundTripAutoScaler = objectMapper.readValue( + final EC2AutoScaler roundTripAutoScaler = (EC2AutoScaler) objectMapper.readValue( objectMapper.writeValueAsBytes(autoScaler), - EC2AutoScaler.class + AutoScaler.class ); verifyAutoScaler(roundTripAutoScaler); diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java index 1eb7028a5da4..0569d07f2b11 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/setup/JavaScriptWorkerSelectStrategyTest.java @@ -86,7 +86,7 @@ public void testSerde() throws Exception STRATEGY, mapper.readValue( mapper.writeValueAsString(STRATEGY), - JavaScriptWorkerSelectStrategy.class + WorkerSelectStrategy.class ) ); } @@ -108,7 +108,7 @@ public void testDisabled() throws Exception expectedException.expectCause(CoreMatchers.instanceOf(IllegalStateException.class)); expectedException.expectMessage("JavaScript is disabled"); - mapper.readValue(strategyString, JavaScriptWorkerSelectStrategy.class); + mapper.readValue(strategyString, WorkerSelectStrategy.class); } @Test diff --git a/pom.xml b/pom.xml index 095d58e2e2b1..870fe78d0467 100644 --- a/pom.xml +++ b/pom.xml @@ -68,8 +68,8 @@ 4.1.0 9.3.19.v20170502 1.19.3 - - 2.4.6 + + 2.6.7 2.5 3.10.6.Final @@ -78,12 +78,10 @@ 4.0.52.Final 1.7.12 - 2.7.3 + 2.8.3 2.0.0 1.6.6 - - 1.10.77 + 1.11.199 2.5.5 3.4.11 @@ -189,49 +187,8 @@ com.amazonaws - aws-java-sdk-ec2 - ${aws.sdk.version} - - - javax.mail - mail - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.core - jackson-annotations - - - commons-codec - commons-codec - - - - - com.amazonaws - aws-java-sdk-s3 - ${aws.sdk.version} - - - javax.mail - mail - - - com.fasterxml.jackson.core - jackson-databind - - - com.fasterxml.jackson.core - jackson-annotations - - - commons-codec - commons-codec - - + aws-java-sdk-bundle + ${aws.sdk.bundle.version} com.ning @@ -612,49 +569,15 @@ aether-api 0.9.0.M2 - - net.java.dev.jets3t - jets3t - 0.9.4 - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - - - org.apache.httpcomponents httpclient - 4.5.1 + 4.5.3 org.apache.httpcomponents httpcore - 4.4.3 + 4.4.4 org.apache.hadoop diff --git a/processing/src/test/java/io/druid/query/groupby/orderby/DefaultLimitSpecTest.java b/processing/src/test/java/io/druid/query/groupby/orderby/DefaultLimitSpecTest.java index 64d354e99fd7..3da0488d6790 100644 --- a/processing/src/test/java/io/druid/query/groupby/orderby/DefaultLimitSpecTest.java +++ b/processing/src/test/java/io/druid/query/groupby/orderby/DefaultLimitSpecTest.java @@ -74,9 +74,9 @@ public void testSerde() throws Exception //defaults String json = "{\"type\": \"default\"}"; - DefaultLimitSpec spec = mapper.readValue( - mapper.writeValueAsString(mapper.readValue(json, DefaultLimitSpec.class)), - DefaultLimitSpec.class + DefaultLimitSpec spec = (DefaultLimitSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(json, LimitSpec.class)), + LimitSpec.class ); Assert.assertEquals( @@ -90,9 +90,9 @@ public void testSerde() throws Exception + " \"columns\":[{\"dimension\":\"d\",\"direction\":\"DESCENDING\", \"dimensionOrder\":\"numeric\"}],\n" + " \"limit\":10\n" + "}"; - spec = mapper.readValue( - mapper.writeValueAsString(mapper.readValue(json, DefaultLimitSpec.class)), - DefaultLimitSpec.class + spec = (DefaultLimitSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(json, LimitSpec.class)), + LimitSpec.class ); Assert.assertEquals( new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("d", OrderByColumnSpec.Direction.DESCENDING, @@ -106,9 +106,9 @@ public void testSerde() throws Exception + " \"limit\":10\n" + "}"; - spec = mapper.readValue( - mapper.writeValueAsString(mapper.readValue(json, DefaultLimitSpec.class)), - DefaultLimitSpec.class + spec = (DefaultLimitSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(json, LimitSpec.class)), + LimitSpec.class ); Assert.assertEquals( @@ -122,9 +122,9 @@ public void testSerde() throws Exception + " \"columns\":[{\"dimension\":\"d\"}],\n" + " \"limit\":10\n" + "}"; - spec = mapper.readValue( - mapper.writeValueAsString(mapper.readValue(json, DefaultLimitSpec.class)), - DefaultLimitSpec.class + spec = (DefaultLimitSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(json, LimitSpec.class)), + LimitSpec.class ); Assert.assertEquals( new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("d", OrderByColumnSpec.Direction.ASCENDING, @@ -137,9 +137,9 @@ public void testSerde() throws Exception + " \"columns\":[\"d\"],\n" + " \"limit\":10\n" + "}"; - spec = mapper.readValue( - mapper.writeValueAsString(mapper.readValue(json, DefaultLimitSpec.class)), - DefaultLimitSpec.class + spec = (DefaultLimitSpec) mapper.readValue( + mapper.writeValueAsString(mapper.readValue(json, LimitSpec.class)), + LimitSpec.class ); Assert.assertEquals( new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("d", OrderByColumnSpec.Direction.ASCENDING, diff --git a/processing/src/test/java/io/druid/query/topn/AlphaNumericTopNMetricSpecTest.java b/processing/src/test/java/io/druid/query/topn/AlphaNumericTopNMetricSpecTest.java index 877d431dd8bf..36562c3388aa 100644 --- a/processing/src/test/java/io/druid/query/topn/AlphaNumericTopNMetricSpecTest.java +++ b/processing/src/test/java/io/druid/query/topn/AlphaNumericTopNMetricSpecTest.java @@ -104,8 +104,8 @@ public void testSerdeAlphaNumericTopNMetricSpec() throws IOException + " \"previousStop\": \"test\"\n" + "}"; ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), AlphaNumericTopNMetricSpec.class); - TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), AlphaNumericTopNMetricSpec.class); + TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), TopNMetricSpec.class); + TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), TopNMetricSpec.class); Assert.assertEquals(expectedMetricSpec, actualMetricSpec); Assert.assertEquals(expectedMetricSpec1, actualMetricSpec1); } diff --git a/processing/src/test/java/io/druid/query/topn/DimensionTopNMetricSpecTest.java b/processing/src/test/java/io/druid/query/topn/DimensionTopNMetricSpecTest.java index 292618ac821c..7091ef5ee93a 100644 --- a/processing/src/test/java/io/druid/query/topn/DimensionTopNMetricSpecTest.java +++ b/processing/src/test/java/io/druid/query/topn/DimensionTopNMetricSpecTest.java @@ -44,8 +44,8 @@ public void testSerdeAlphaNumericDimensionTopNMetricSpec() throws IOException + " \"previousStop\": \"test\"\n" + "}"; ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); - TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); + TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), TopNMetricSpec.class); + TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), TopNMetricSpec.class); Assert.assertEquals(expectedMetricSpec, actualMetricSpec); Assert.assertEquals(expectedMetricSpec1, actualMetricSpec1); } @@ -65,8 +65,8 @@ public void testSerdeLexicographicDimensionTopNMetricSpec() throws IOException + " \"previousStop\": \"test\"\n" + "}"; ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); - TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); + TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), TopNMetricSpec.class); + TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), TopNMetricSpec.class); Assert.assertEquals(expectedMetricSpec, actualMetricSpec); Assert.assertEquals(expectedMetricSpec1, actualMetricSpec1); } @@ -86,8 +86,8 @@ public void testSerdeStrlenDimensionTopNMetricSpec() throws IOException + " \"previousStop\": \"test\"\n" + "}"; ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); - TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); + TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), TopNMetricSpec.class); + TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), TopNMetricSpec.class); Assert.assertEquals(expectedMetricSpec, actualMetricSpec); Assert.assertEquals(expectedMetricSpec1, actualMetricSpec1); } @@ -107,8 +107,8 @@ public void testSerdeNumericDimensionTopNMetricSpec() throws IOException + " \"previousStop\": \"test\"\n" + "}"; ObjectMapper jsonMapper = TestHelper.makeJsonMapper(); - TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); - TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), DimensionTopNMetricSpec.class); + TopNMetricSpec actualMetricSpec = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec, TopNMetricSpec.class)), TopNMetricSpec.class); + TopNMetricSpec actualMetricSpec1 = jsonMapper.readValue(jsonMapper.writeValueAsString(jsonMapper.readValue(jsonSpec1, TopNMetricSpec.class)), TopNMetricSpec.class); Assert.assertEquals(expectedMetricSpec, actualMetricSpec); Assert.assertEquals(expectedMetricSpec1, actualMetricSpec1); } diff --git a/server/pom.xml b/server/pom.xml index 0b9a07a3b9ef..4173650b1b0b 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -70,6 +70,10 @@ org.apache.zookeeper zookeeper + + org.apache.httpcomponents + httpclient + org.apache.curator curator-framework diff --git a/server/src/main/java/io/druid/guice/AWSModule.java b/server/src/main/java/io/druid/guice/AWSModule.java index 15925816a28c..4127f3cb2000 100644 --- a/server/src/main/java/io/druid/guice/AWSModule.java +++ b/server/src/main/java/io/druid/guice/AWSModule.java @@ -27,6 +27,8 @@ import com.google.inject.Provides; import io.druid.common.aws.AWSCredentialsConfig; import io.druid.common.aws.AWSCredentialsUtils; +import io.druid.common.aws.AWSEndpointConfig; +import io.druid.common.aws.AWSProxyConfig; /** */ @@ -36,6 +38,8 @@ public class AWSModule implements Module public void configure(Binder binder) { JsonConfigProvider.bind(binder, "druid.s3", AWSCredentialsConfig.class); + JsonConfigProvider.bind(binder, "druid.s3.proxy", AWSProxyConfig.class); + JsonConfigProvider.bind(binder, "druid.s3.endpoint", AWSEndpointConfig.class); } @Provides diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java b/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java index cf3ecea6fba2..aaab6f9dae55 100644 --- a/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java +++ b/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java @@ -23,11 +23,11 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; +import com.google.common.net.HttpHeaders; import io.druid.data.input.impl.prefetch.PrefetchableTextFilesFirehoseFactory; import io.druid.java.util.common.CompressionUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; -import org.apache.http.HttpHeaders; import java.io.IOException; import java.io.InputStream; diff --git a/server/src/main/java/io/druid/server/AsyncQueryForwardingServlet.java b/server/src/main/java/io/druid/server/AsyncQueryForwardingServlet.java index 6f67ce06d76b..d10880fe83e5 100644 --- a/server/src/main/java/io/druid/server/AsyncQueryForwardingServlet.java +++ b/server/src/main/java/io/druid/server/AsyncQueryForwardingServlet.java @@ -58,6 +58,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response.Status; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; @@ -465,13 +466,11 @@ public void onComplete(Result result) TimeUnit.NANOSECONDS.toMillis(requestTimeNs), "success", success - && result.getResponse().getStatus() == javax.ws.rs.core.Response.Status.OK.getStatusCode() + && result.getResponse().getStatus() == Status.OK.getStatusCode() ) ) ) ); - - } catch (Exception e) { log.error(e, "Unable to log query [%s]!", query); diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 94a776e457f9..1b1508b55584 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -60,7 +60,6 @@ import io.druid.server.metrics.DataSourceTaskIdHolder; import io.druid.server.metrics.MetricsModule; import io.druid.server.metrics.MonitorsConfig; -import org.apache.http.HttpVersion; import org.eclipse.jetty.server.ConnectionFactory; import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.HttpConfiguration; @@ -90,6 +89,7 @@ public class JettyServerModule extends JerseyServletModule private static final Logger log = new Logger(JettyServerModule.class); private static final AtomicInteger activeConnections = new AtomicInteger(); + private static final String HTTP_1_1_STRING = "HTTP/1.1"; @Override protected void configureServlets() @@ -268,7 +268,7 @@ static Server makeAndInitializeServer( httpsConfiguration.setRequestHeaderSize(config.getMaxRequestHeaderSize()); final ServerConnector connector = new ServerConnector( server, - new SslConnectionFactory(sslContextFactory, HttpVersion.HTTP_1_1.toString()), + new SslConnectionFactory(sslContextFactory, HTTP_1_1_STRING), new HttpConnectionFactory(httpsConfiguration) ); connector.setPort(node.getTlsPort()); diff --git a/server/src/test/java/io/druid/query/dimension/LookupDimensionSpecTest.java b/server/src/test/java/io/druid/query/dimension/LookupDimensionSpecTest.java index 7e1ef423f959..02c44c297253 100644 --- a/server/src/test/java/io/druid/query/dimension/LookupDimensionSpecTest.java +++ b/server/src/test/java/io/druid/query/dimension/LookupDimensionSpecTest.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.jsontype.NamedType; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import io.druid.jackson.DefaultObjectMapper; @@ -70,12 +71,13 @@ public class LookupDimensionSpecTest public void testSerDesr(DimensionSpec lookupDimSpec) throws IOException { ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes(new NamedType(LookupDimensionSpec.class, "lookup")); InjectableValues injectableValues = new InjectableValues.Std().addValue( LookupReferencesManager.class, LOOKUP_REF_MANAGER ); String serLookup = mapper.writeValueAsString(lookupDimSpec); - Assert.assertEquals(lookupDimSpec, mapper.reader(LookupDimensionSpec.class).with(injectableValues).readValue(serLookup)); + Assert.assertEquals(lookupDimSpec, mapper.reader(DimensionSpec.class).with(injectableValues).readValue(serLookup)); } private Object[] parametersForTestSerDesr() From 0851f2206c129f3dabdd87b48b7531cdb5ba2e37 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 21 Mar 2018 18:19:27 -0700 Subject: [PATCH 02/67] Expanded documentation for DataSketches aggregators. (#5513) Originally written by @AlexanderSaydakov in druid-io/druid-io.github.io#448. I also added redirects and updated links to point to the new datasketches-extension.html landing page for the extension, rather than to the old page about theta sketches. --- docs/_redirects.json | 3 +- .../extensions-core/datasketches-extension.md | 19 +++ .../extensions-core/datasketches-quantiles.md | 92 +++++++++++ ...s-aggregators.md => datasketches-theta.md} | 10 +- .../extensions-core/datasketches-tuple.md | 155 ++++++++++++++++++ docs/content/development/extensions.md | 2 +- docs/content/querying/aggregations.md | 2 +- docs/content/querying/sql.md | 2 +- docs/content/toc.md | 2 +- 9 files changed, 277 insertions(+), 10 deletions(-) create mode 100644 docs/content/development/extensions-core/datasketches-extension.md create mode 100644 docs/content/development/extensions-core/datasketches-quantiles.md rename docs/content/development/extensions-core/{datasketches-aggregators.md => datasketches-theta.md} (91%) create mode 100644 docs/content/development/extensions-core/datasketches-tuple.md diff --git a/docs/_redirects.json b/docs/_redirects.json index 0e6ef6ff9868..b0e619927792 100644 --- a/docs/_redirects.json +++ b/docs/_redirects.json @@ -94,7 +94,8 @@ {"source": "configuration/simple-cluster.html", "target": "../tutorials/cluster.html"}, {"source": "design/concepts-and-terminology.html", "target": "index.html"}, {"source": "development/approximate-histograms.html", "target": "extensions-core/approximate-histograms.html"}, - {"source": "development/datasketches-aggregators.html", "target": "extensions-core/datasketches-aggregators.html"}, + {"source": "development/datasketches-aggregators.html", "target": "extensions-core/datasketches-extension.html"}, + {"source": "development/extensions-core/datasketches-aggregators.html", "target": "datasketches-extension.html"}, {"source": "development/libraries.html", "target": "/libraries.html"}, {"source": "development/kafka-simple-consumer-firehose.html", "target": "extensions-contrib/kafka-simple.html"}, {"source": "development/select-query.html", "target": "../querying/select-query.html"}, diff --git a/docs/content/development/extensions-core/datasketches-extension.md b/docs/content/development/extensions-core/datasketches-extension.md new file mode 100644 index 000000000000..a8a1d8738b82 --- /dev/null +++ b/docs/content/development/extensions-core/datasketches-extension.md @@ -0,0 +1,19 @@ +--- +layout: doc_page +--- + +## DataSketches extension + +Druid aggregators based on [datasketches](http://datasketches.github.io/) library. Sketches are data structures implementing approximate streaming mergeable algorithms. Sketches can be ingested from the outside of Druid or built from raw data at ingestion time. Sketches can be stored in Druid segments as additive metrics. + +To use the datasketch aggregators, make sure you [include](../../operations/including-extensions.html) the extension in your config file: + +``` +druid.extensions.loadList=["druid-datasketches"] +``` + +The following aggregators are available: + +1. [Theta sketch](datasketches-theta.html), useful for approximate set counting, and supporting union, intersection, and difference operations. +2. [Quantiles sketch](datasketches-quantiles.html). +3. [Tuple sketch](datasketches-tuple.html). diff --git a/docs/content/development/extensions-core/datasketches-quantiles.md b/docs/content/development/extensions-core/datasketches-quantiles.md new file mode 100644 index 000000000000..c9eeb84c1777 --- /dev/null +++ b/docs/content/development/extensions-core/datasketches-quantiles.md @@ -0,0 +1,92 @@ +--- +layout: doc_page +--- + +## DataSketches Quantiles Sketch module + +This module provides Druid aggregators based on numeric quantiles DoublesSketch from [datasketches](http://datasketches.github.io/) library. Quantiles sketch is a mergeable streaming algorithm to estimate the distribution of values, and approximately answer queries about the rank of a value, probability mass function of the distribution (PMF) or histogram, cummulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such). See [Quantiles Sketch Overview](https://datasketches.github.io/docs/Quantiles/QuantilesOverview.html). + +There are three major modes of operation: + +1. Ingesting sketches built outside of Druid (say, with Pig or Hive) +2. Building sketches from raw data during ingestion +3. Building sketches from raw data at query time + +To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file: + +``` +druid.extensions.loadList=["druid-datasketches"] +``` + +### Aggregator + +The result of the aggregation is a DoublesSketch that is the union of all sketches either built from raw data or read from the segments. + +```json +{ + "type" : "quantilesDoublesSketch", + "name" : , + "fieldName" : , + "k": + } +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This String should always be "quantilesDoublesSketch"|yes| +|name|A String for the output (result) name of the calculation.|yes| +|fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| +|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2 from 2 to 32768. See the [Quantiles Accuracy](https://datasketches.github.io/docs/Quantiles/QuantilesAccuracy.html) for details. |no, defaults to 128| + +### Post Aggregators + +#### Quantile + +This returns an approximation to the value that would be preceded by a given fraction of a hypothetical sorted version of the input stream. + +```json +{ + "type" : "quantilesDoublesSketchToQuantile", + "name": , + "field" : , + "fraction" : +} +``` + +#### Quantiles + +This returns an array of quantiles corresponding to a given array of fractions + +```json +{ + "type" : "quantilesDoublesSketchToQuantiles", + "name": , + "field" : , + "fractions" : +} +``` + +#### Histogram + +This returns an approximation to the histogram given an array of split points that define the histogram bins. An array of m unique, monotonically increasing split points divide the real number line into m+1 consecutive disjoint intervals. The definition of an interval is inclusive of the left split point and exclusive of the right split point. + +```json +{ + "type" : "quantilesDoublesSketchToHistogram", + "name": , + "field" : , + "splitPoints" : +} +``` + +#### Sketch Summary + +This returns a summary of the sketch that can be used for debugging. This is the result of calling toString() method. + +```json +{ + "type" : "quantilesDoublesSketchToString", + "name": , + "field" : +} +``` diff --git a/docs/content/development/extensions-core/datasketches-aggregators.md b/docs/content/development/extensions-core/datasketches-theta.md similarity index 91% rename from docs/content/development/extensions-core/datasketches-aggregators.md rename to docs/content/development/extensions-core/datasketches-theta.md index bf7e4a601f39..07d179555bef 100644 --- a/docs/content/development/extensions-core/datasketches-aggregators.md +++ b/docs/content/development/extensions-core/datasketches-theta.md @@ -2,13 +2,13 @@ layout: doc_page --- -## DataSketches aggregator +## DataSketches Theta Sketch module -Druid aggregators based on [datasketches](http://datasketches.github.io/) library. Note that sketch algorithms are approximate; see details in the "Accuracy" section of the datasketches doc. -At ingestion time, this aggregator creates the theta sketch objects which get stored in Druid segments. Logically speaking, a theta sketch object can be thought of as a Set data structure. At query time, sketches are read and aggregated (set unioned) together. In the end, by default, you receive the estimate of the number of unique entries in the sketch object. Also, you can use post aggregators to do union, intersection or difference on sketch columns in the same row. -Note that you can use `thetaSketch` aggregator on columns which were not ingested using same, it will return estimated cardinality of the column. It is recommended to use it at ingestion time as well to make querying faster. +This module provides Druid aggregators based on Theta sketch from [datasketches](http://datasketches.github.io/) library. Note that sketch algorithms are approximate; see details in the "Accuracy" section of the datasketches doc. +At ingestion time, this aggregator creates the Theta sketch objects which get stored in Druid segments. Logically speaking, a Theta sketch object can be thought of as a Set data structure. At query time, sketches are read and aggregated (set unioned) together. In the end, by default, you receive the estimate of the number of unique entries in the sketch object. Also, you can use post aggregators to do union, intersection or difference on sketch columns in the same row. +Note that you can use `thetaSketch` aggregator on columns which were not ingested using the same. It will return estimated cardinality of the column. It is recommended to use it at ingestion time as well to make querying faster. -To use the datasketch aggregators, make sure you [include](../../operations/including-extensions.html) the extension in your config file: +To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file: ``` druid.extensions.loadList=["druid-datasketches"] diff --git a/docs/content/development/extensions-core/datasketches-tuple.md b/docs/content/development/extensions-core/datasketches-tuple.md new file mode 100644 index 000000000000..37ed8ead4004 --- /dev/null +++ b/docs/content/development/extensions-core/datasketches-tuple.md @@ -0,0 +1,155 @@ +--- +layout: doc_page +--- + +## DataSketches Tuple Sketch module + +This module provides Druid aggregators based on Tuple sketch from [datasketches](http://datasketches.github.io/) library. ArrayOfDoublesSketch sketches extend the functionality of the count-distinct Theta sketches by adding arrays of double values associated with unique keys. + +To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file: + +``` +druid.extensions.loadList=["druid-datasketches"] +``` + +### Aggregators + +```json +{ + "type" : "arrayOfDoublesSketch", + "name" : , + "fieldName" : , + "nominalEntries": , + "numberOfValues" : , + "metricColumns" : + } +``` + +|property|description|required?| +|--------|-----------|---------| +|type|This String should always be "arrayOfDoublesSketch"|yes| +|name|A String for the output (result) name of the calculation.|yes| +|fieldName|A String for the name of the input field.|yes| +|nominalEntries|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2. See the [Theta sketch accuracy](https://datasketches.github.io/docs/Theta/ThetaErrorTable.html) for details. |no, defaults to 16384| +|numberOfValues|Number of values associated with each distinct key. |no, defaults to 1| +|metricCoulumns|If building sketches from raw data, an array of names of the input columns containing numeric vaues to be associated with each distinct key.|no, defaults to empty array| + +### Post Aggregators + +#### Estimate of the number of distinct keys + +Returns a distinct count estimate from a given ArrayOfDoublesSketch. + +```json +{ + "type" : "arrayOfDoublesSketchToEstimate", + "name": , + "field" : +} +``` + +#### Estimate of the number of distinct keys with error bounds + +Returns a distinct count estimate and error bounds from a given ArrayOfDoublesSketch. The result will be three double values: estimate of the number of distinct keys, lower bound and upper bound. The bounds are provided at the given number of standard deviations (optional, defaults to 1). This must be an integer value of 1, 2 or 3 corresponding to approximately 68.3%, 95.4% and 99.7% confidence intervals. + +```json +{ + "type" : "arrayOfDoublesSketchToEstimateAndBounds", + "name": , + "field" : , + "numStdDevs", +} +``` + +#### Number of retained entries + +Returns the number of retained entries from a given ArrayOfDoublesSketch. + +```json +{ + "type" : "arrayOfDoublesSketchToNumEntries", + "name": , + "field" : +} +``` + +#### Mean values for each column + +Returns a list of mean values from a given ArrayOfDoublesSketch. The result will be N double values, where N is the number of double values kept in the sketch per key. + +```json +{ + "type" : "arrayOfDoublesSketchToMeans", + "name": , + "field" : +} +``` + +#### Variance values for each column + +Returns a list of variance values from a given ArrayOfDoublesSketch. The result will be N double values, where N is the number of double values kept in the sketch per key. + +```json +{ + "type" : "arrayOfDoublesSketchToVariances", + "name": , + "field" : +} +``` + +#### Quantiles sketch from a column + +Returns a quanitles DoublesSketch constructed from a given column of values from a given ArrayOfDoublesSketch using optional parameter k that determines the accuracy and size of the quantiles sketch. See [Quantiles Sketch Module](datasketches-quantiles.html) + +* The column number is 1-based and is optional (the default is 1). +* The parameter k is optional (the default is defined in the sketch library). +* The result is a quantiles sketch. + +```json +{ + "type" : "arrayOfDoublesSketchToQuantilesSketch", + "name": , + "field" : , + "column" : , + "k" : +} +``` + +#### Set Operations + +Returns a result of a specified set operation on the given array of sketches. Supported operations are: union, intersection and set difference (UNION, INTERSECT, NOT). + +```json +{ + "type" : "arrayOfDoublesSketchSetOp", + "name": , + "func": <"UNION"|"INTERSECT"|"NOT">, + "fields" : , + "nominalEntries" : , + "numberOfValues" : +} +``` + +#### Student's t-test + +Performs Student's t-test and returns a list of p-values given two instances of ArrayOfDoublesSketch. The result will be N double values, where N is the number of double values kept in the sketch per key. See [t-test documentation](http://commons.apache.org/proper/commons-math/javadocs/api-3.4/org/apache/commons/math3/stat/inference/TTest.html). + +```json +{ + "type" : "arrayOfDoublesSketchTTest", + "name": , + "fields" : , +} +``` + +#### Sketch summary + +Returns a human-readable summary of a given ArrayOfDoublesSketch. This is a string returned by toString() method of the sketch. This can be useful for debugging. + +```json +{ + "type" : "arrayOfDoublesSketchToString", + "name": , + "field" : +} +``` diff --git a/docs/content/development/extensions.md b/docs/content/development/extensions.md index 4e2bea613fad..41c725ec91b4 100644 --- a/docs/content/development/extensions.md +++ b/docs/content/development/extensions.md @@ -24,7 +24,7 @@ Core extensions are maintained by Druid committers. |druid-avro-extensions|Support for data in Apache Avro data format.|[link](../development/extensions-core/avro.html)| |druid-basic-security|Support for Basic HTTP authentication and role-based access control.|[link](../development/extensions-core/druid-basic-security.html)| |druid-caffeine-cache|A local cache implementation backed by Caffeine.|[link](../development/extensions-core/caffeine-cache.html)| -|druid-datasketches|Support for approximate counts and set operations with [DataSketches](http://datasketches.github.io/).|[link](../development/extensions-core/datasketches-aggregators.html)| +|druid-datasketches|Support for approximate counts and set operations with [DataSketches](http://datasketches.github.io/).|[link](../development/extensions-core/datasketches-extension.html)| |druid-hdfs-storage|HDFS deep storage.|[link](../development/extensions-core/hdfs.html)| |druid-histogram|Approximate histograms and quantiles aggregator.|[link](../development/extensions-core/approximate-histograms.html)| |druid-kafka-eight|Kafka ingest firehose (high level consumer) for realtime nodes.|[link](../development/extensions-core/kafka-eight-firehose.html)| diff --git a/docs/content/querying/aggregations.md b/docs/content/querying/aggregations.md index 420daf97afbb..b0ce5cc24c9d 100644 --- a/docs/content/querying/aggregations.md +++ b/docs/content/querying/aggregations.md @@ -332,7 +332,7 @@ The HyperLogLog algorithm generates decimal estimates with some error. "round" c values to whole numbers. Note that even with rounding, the cardinality is still an estimate. The "round" field only affects query-time behavior, and is ignored at ingestion-time. -For more approximate aggregators, please see [theta sketches](../development/extensions-core/datasketches-aggregators.html). +For more approximate aggregators, check out the [DataSketches extension](../development/extensions-core/datasketches-extension.html). ## Miscellaneous Aggregations diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md index 5796a66701c9..a993a76f9539 100644 --- a/docs/content/querying/sql.md +++ b/docs/content/querying/sql.md @@ -220,7 +220,7 @@ Druid does not support all SQL features, including: Additionally, some Druid features are not supported by the SQL language. Some unsupported Druid features include: - [Multi-value dimensions](multi-value-dimensions.html). -- [DataSketches aggregators](../development/extensions-core/datasketches-aggregators.html). +- [DataSketches aggregators](../development/extensions-core/datasketches-extension.html). - [Spatial filters](../development/geo.html). - [Query cancellation](querying.html#query-cancellation). diff --git a/docs/content/toc.md b/docs/content/toc.md index e51f95c05d46..addd5ebdc839 100644 --- a/docs/content/toc.md +++ b/docs/content/toc.md @@ -103,7 +103,7 @@ layout: toc * Experimental Features * [Overview](/docs/VERSION/development/experimental.html) * [Approximate Histograms and Quantiles](/docs/VERSION/development/extensions-core/approximate-histograms.html) - * [Datasketches](/docs/VERSION/development/extensions-core/datasketches-aggregators.html) + * [Datasketches](/docs/VERSION/development/extensions-core/datasketches-extension.html) * [Geographic Queries](/docs/VERSION/development/geo.html) * [Router](/docs/VERSION/development/router.html) * [Kafka Indexing Service](/docs/VERSION/development/extensions-core/kafka-ingestion.html) From ef21ce5a647003600a076bf691e0bcdf0ec58987 Mon Sep 17 00:00:00 2001 From: Charles Allen Date: Fri, 23 Mar 2018 09:38:17 -0700 Subject: [PATCH 03/67] Add graceful shutdown timeout for Jetty (#5429) * Add graceful shutdown timeout * Handle interruptedException * Incorporate code review comments * Address code review comments * Poll for activeConnections to be zero * Use statistics handler to get active requests * Use native jetty shutdown gracefully * Move log line back to where it was * Add unannounce wait time * Make the default retain prior behavior * Update docs with new config defaults * Make duration handling on jetty shutdown more consistent * StatisticsHandler is a wrapper * Move jetty lifecycle error logging to error --- docs/content/configuration/broker.md | 2 + docs/content/configuration/historical.md | 2 + .../server/initialization/ServerConfig.java | 46 +++++++++++++++++- .../jetty/JettyServerModule.java | 48 +++++++++++++++++++ .../cli/QueryJettyServerInitializer.java | 26 +++++++--- 5 files changed, 115 insertions(+), 9 deletions(-) diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index edddca824db3..f36ae2a95e2d 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -41,6 +41,8 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.enableRequestLimit`|If enabled, no requests would be queued in jetty queue and "HTTP 429 Too Many Requests" error response would be sent. |false| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| |`druid.server.http.maxScatterGatherBytes`|Maximum number of bytes gathered from data nodes such as historicals and realtime processes to execute a query. This is an advance configuration that allows to protect in case broker is under heavy load and not utilizing the data gathered in memory fast enough and leading to OOMs. This limit can be further reduced at query time using `maxScatterGatherBytes` in the context. Note that having large limit is not necessarily bad if broker is never under heavy concurrent load in which case data gathered is processed quickly and freeing up the memory used.|Long.MAX_VALUE| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|`PT0s` (do not wait)| +|`druid.server.http.unannouncePropogationDelay`|How long to wait for zookeeper unannouncements to propgate before shutting down Jetty. This is a minimum and `druid.server.http.gracefulShutdownTimeout` does not start counting down until after this period elapses.|`PT0s` (do not wait)| |`druid.broker.http.numConnections`|Size of connection pool for the Broker to connect to historical and real-time processes. If there are more queries than this number that all need to speak to the same node, then they will queue up.|20| |`druid.broker.http.compressionCodec`|Compression codec the Broker uses to communicate with historical and real-time processes. May be "gzip" or "identity".|gzip| |`druid.broker.http.readTimeout`|The timeout for data reads from historical and real-time processes.|PT15M| diff --git a/docs/content/configuration/historical.md b/docs/content/configuration/historical.md index 9290e265d27e..7d2d1ff49873 100644 --- a/docs/content/configuration/historical.md +++ b/docs/content/configuration/historical.md @@ -54,6 +54,8 @@ Druid uses Jetty to serve HTTP requests. |`druid.server.http.maxIdleTime`|The Jetty max idle time for a connection.|PT5m| |`druid.server.http.enableRequestLimit`|If enabled, no requests would be queued in jetty queue and "HTTP 429 Too Many Requests" error response would be sent. |false| |`druid.server.http.defaultQueryTimeout`|Query timeout in millis, beyond which unfinished queries will be cancelled|300000| +|`druid.server.http.gracefulShutdownTimeout`|The maximum amount of time Jetty waits after receiving shutdown signal. After this timeout the threads will be forcefully shutdown. This allows any queries that are executing to complete.|`PT0s` (do not wait)| +|`druid.server.http.unannouncePropogationDelay`|How long to wait for zookeeper unannouncements to propgate before shutting down Jetty. This is a minimum and `druid.server.http.gracefulShutdownTimeout` does not start counting down until after this period elapses.|`PT0s` (do not wait)| |`druid.server.http.maxQueryTimeout`|Maximum allowed value (in milliseconds) for `timeout` parameter. See [query-context](query-context.html) to know more about `timeout`. Query is rejected if the query context `timeout` is greater than this value. |Long.MAX_VALUE| |`druid.server.http.maxRequestHeaderSize`|Maximum size of a request header in bytes. Larger headers consume more memory and can make a server more vulnerable to denial of service attacks.|8 * 1024| diff --git a/server/src/main/java/io/druid/server/initialization/ServerConfig.java b/server/src/main/java/io/druid/server/initialization/ServerConfig.java index 9c0db293583d..8fa80d5e50b6 100644 --- a/server/src/main/java/io/druid/server/initialization/ServerConfig.java +++ b/server/src/main/java/io/druid/server/initialization/ServerConfig.java @@ -60,6 +60,14 @@ public class ServerConfig @JsonProperty private int maxRequestHeaderSize = 8 * 1024; + @JsonProperty + @NotNull + private Period gracefulShutdownTimeout = Period.ZERO; + + @JsonProperty + @NotNull + private Period unannouncePropogationDelay = Period.ZERO; + public int getNumThreads() { return numThreads; @@ -100,6 +108,16 @@ public int getMaxRequestHeaderSize() return maxRequestHeaderSize; } + public Period getGracefulShutdownTimeout() + { + return gracefulShutdownTimeout; + } + + public Period getUnannouncePropogationDelay() + { + return unannouncePropogationDelay; + } + @Override public boolean equals(Object o) { @@ -115,13 +133,17 @@ public boolean equals(Object o) enableRequestLimit == that.enableRequestLimit && defaultQueryTimeout == that.defaultQueryTimeout && maxScatterGatherBytes == that.maxScatterGatherBytes && + maxQueryTimeout == that.maxQueryTimeout && + maxRequestHeaderSize == that.maxRequestHeaderSize && Objects.equals(maxIdleTime, that.maxIdleTime) && - maxQueryTimeout == that.maxQueryTimeout; + Objects.equals(gracefulShutdownTimeout, that.gracefulShutdownTimeout) && + Objects.equals(unannouncePropogationDelay, that.unannouncePropogationDelay); } @Override public int hashCode() { + return Objects.hash( numThreads, queueSize, @@ -129,7 +151,27 @@ public int hashCode() maxIdleTime, defaultQueryTimeout, maxScatterGatherBytes, - maxQueryTimeout + maxQueryTimeout, + maxRequestHeaderSize, + gracefulShutdownTimeout, + unannouncePropogationDelay ); } + + @Override + public String toString() + { + return "ServerConfig{" + + "numThreads=" + numThreads + + ", queueSize=" + queueSize + + ", enableRequestLimit=" + enableRequestLimit + + ", maxIdleTime=" + maxIdleTime + + ", defaultQueryTimeout=" + defaultQueryTimeout + + ", maxScatterGatherBytes=" + maxScatterGatherBytes + + ", maxQueryTimeout=" + maxQueryTimeout + + ", maxRequestHeaderSize=" + maxRequestHeaderSize + + ", gracefulShutdownTimeout=" + gracefulShutdownTimeout + + ", unannouncePropogationDelay=" + unannouncePropogationDelay + + '}'; + } } diff --git a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java index 1b1508b55584..b4a5cc44f329 100644 --- a/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java +++ b/server/src/main/java/io/druid/server/initialization/jetty/JettyServerModule.java @@ -68,6 +68,7 @@ import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.server.SslConnectionFactory; +import org.eclipse.jetty.util.component.LifeCycle; import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler; @@ -294,6 +295,42 @@ static Server makeAndInitializeServer( } server.setConnectors(connectors); + final long gracefulStop = config.getGracefulShutdownTimeout().toStandardDuration().getMillis(); + if (gracefulStop > 0) { + server.setStopTimeout(gracefulStop); + } + server.addLifeCycleListener(new LifeCycle.Listener() + { + @Override + public void lifeCycleStarting(LifeCycle event) + { + log.debug("Jetty lifecycle starting [%s]", event.getClass()); + } + + @Override + public void lifeCycleStarted(LifeCycle event) + { + log.debug("Jetty lifeycle started [%s]", event.getClass()); + } + + @Override + public void lifeCycleFailure(LifeCycle event, Throwable cause) + { + log.error(cause, "Jetty lifecycle event failed [%s]", event.getClass()); + } + + @Override + public void lifeCycleStopping(LifeCycle event) + { + log.debug("Jetty lifecycle stopping [%s]", event.getClass()); + } + + @Override + public void lifeCycleStopped(LifeCycle event) + { + log.debug("Jetty lifecycle stopped [%s]", event.getClass()); + } + }); // initialize server JettyServerInitializer initializer = injector.getInstance(JettyServerInitializer.class); @@ -339,9 +376,20 @@ public void start() throws Exception public void stop() { try { + final long unannounceDelay = config.getUnannouncePropogationDelay().toStandardDuration().getMillis(); + if (unannounceDelay > 0) { + log.info("Waiting %s ms for unannouncement to propogate.", unannounceDelay); + Thread.sleep(unannounceDelay); + } else { + log.debug("Skipping unannounce wait."); + } log.info("Stopping Jetty Server..."); server.stop(); } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RE(e, "Interrupted waiting for jetty shutdown."); + } catch (Exception e) { log.warn(e, "Unable to stop Jetty server."); } diff --git a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java index 0bc34f17fee9..e9ddfb6f1c63 100644 --- a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java @@ -39,6 +39,7 @@ import org.eclipse.jetty.server.Handler; import org.eclipse.jetty.server.Server; import org.eclipse.jetty.server.handler.HandlerList; +import org.eclipse.jetty.server.handler.StatisticsHandler; import org.eclipse.jetty.servlet.DefaultServlet; import org.eclipse.jetty.servlet.FilterHolder; import org.eclipse.jetty.servlet.ServletContextHandler; @@ -111,13 +112,24 @@ public void initialize(Server server, Injector injector) root.addFilter(GuiceFilter.class, "/*", null); final HandlerList handlerList = new HandlerList(); - final Handler[] handlers = new Handler[extensionHandlers.size() + 2]; - handlers[0] = JettyServerInitUtils.getJettyRequestLogHandler(); - handlers[handlers.length - 1] = JettyServerInitUtils.wrapWithDefaultGzipHandler(root); - for (int i = 0; i < extensionHandlers.size(); i++) { - handlers[i + 1] = extensionHandlers.get(i); + // Do not change the order of the handlers that have already been added + for (Handler handler : server.getHandlers()) { + handlerList.addHandler(handler); } - handlerList.setHandlers(handlers); - server.setHandler(handlerList); + + handlerList.addHandler(JettyServerInitUtils.getJettyRequestLogHandler()); + + // Add all extension handlers + for (Handler handler : extensionHandlers) { + handlerList.addHandler(handler); + } + + // Add Gzip handler at the very end + handlerList.addHandler(JettyServerInitUtils.wrapWithDefaultGzipHandler(root)); + + final StatisticsHandler statisticsHandler = new StatisticsHandler(); + statisticsHandler.setHandler(handlerList); + + server.setHandler(statisticsHandler); } } From ec17a44e0983cd6391ba9f60a1c5571a59595b0a Mon Sep 17 00:00:00 2001 From: Atul Mohan Date: Fri, 23 Mar 2018 21:11:52 -0500 Subject: [PATCH 04/67] Add result level caching to Brokers (#5028) * Add result level caching to Brokers * Minor doc changes * Simplify sequences * Move etag execution * Modify cacheLimit criteria * Fix incorrect etag computation * Fix docs * Add separate query runner for result level caching * Update docs * Add post aggregated results to result level cache * Fix indents * Check byte size for exceeding cache limit * Fix indents * Fix indents * Add flag for result caching * Remove logs * Make cache object generation synchronous * Avoid saving intermediate cache results to list * Fix changes that handle etag based response * Release bytestream after use * Address PR comments * Discard resultcache stream after use * Fix docs * Address comments * Add comment about fluent workflow issue --- docs/content/configuration/broker.md | 3 + docs/content/querying/caching.md | 8 +- docs/content/querying/query-context.md | 2 + .../java/io/druid/query/CacheStrategy.java | 25 +- .../java/io/druid/query/QueryContexts.java | 22 ++ .../groupby/GroupByQueryQueryToolChest.java | 17 +- .../SegmentMetadataQueryQueryToolChest.java | 4 +- .../search/SearchQueryQueryToolChest.java | 4 +- .../select/SelectQueryQueryToolChest.java | 4 +- .../TimeBoundaryQueryQueryToolChest.java | 4 +- .../TimeseriesQueryQueryToolChest.java | 16 +- .../query/topn/TopNQueryQueryToolChest.java | 17 +- ...egmentMetadataQueryQueryToolChestTest.java | 4 +- .../search/SearchQueryQueryToolChestTest.java | 4 +- .../TimeBoundaryQueryQueryToolChestTest.java | 4 +- .../TimeseriesQueryQueryToolChestTest.java | 29 +- .../topn/TopNQueryQueryToolChestTest.java | 37 ++- .../druid/client/CachingClusteredClient.java | 4 +- .../io/druid/client/CachingQueryRunner.java | 4 +- .../io/druid/client/ResultLevelCacheUtil.java | 94 ++++++ .../io/druid/client/cache/CacheConfig.java | 27 +- .../query/ResultLevelCachingQueryRunner.java | 302 ++++++++++++++++++ .../server/ClientQuerySegmentWalker.java | 30 +- .../druid/client/CachingQueryRunnerTest.java | 4 +- 24 files changed, 618 insertions(+), 51 deletions(-) create mode 100644 server/src/main/java/io/druid/client/ResultLevelCacheUtil.java create mode 100644 server/src/main/java/io/druid/query/ResultLevelCachingQueryRunner.java diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index f36ae2a95e2d..0d57e843e348 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -110,6 +110,9 @@ You can optionally only configure caching to be enabled on the broker by setting |--------|---------------|-----------|-------| |`druid.broker.cache.useCache`|true, false|Enable the cache on the broker.|false| |`druid.broker.cache.populateCache`|true, false|Populate the cache on the broker.|false| +|`druid.broker.cache.useResultLevelCache`|true, false|Enable result level caching on the broker.|false| +|`druid.broker.cache.populateResultLevelCache`|true, false|Populate the result level cache on the broker.|false| +|`druid.broker.cache.resultLevelCacheLimit`|positive integer|Maximum size of query response that can be cached.|`Integer.MAX_VALUE`| |`druid.broker.cache.unCacheable`|All druid query types|All query types to not cache.|`["groupBy", "select"]`| |`druid.broker.cache.cacheBulkMergeLimit`|positive integer or 0|Queries with more segments than this number will not attempt to fetch from cache at the broker level, leaving potential caching fetches (and cache result merging) to the historicals|`Integer.MAX_VALUE`| diff --git a/docs/content/querying/caching.md b/docs/content/querying/caching.md index 1d6f60af441e..2dd2c88d6b62 100644 --- a/docs/content/querying/caching.md +++ b/docs/content/querying/caching.md @@ -3,9 +3,10 @@ layout: doc_page --- # Query Caching -Druid supports query result caching through an LRU cache. Results are stored on a per segment basis, along with the -parameters of a given query. This allows Druid to return final results based partially on segment results in the cache and partially -on segment results from scanning historical/real-time segments. +Druid supports query result caching through an LRU cache. Results are stored as a whole or either on a per segment basis along with the +parameters of a given query. Segment level caching allows Druid to return final results based partially on segment results in the cache +and partially on segment results from scanning historical/real-time segments. Result level caching enables Druid to cache the entire +result set, so that query results can be completely retrieved from the cache for identical queries. Segment results can be stored in a local heap cache or in an external distributed key/value store. Segment query caches can be enabled at either the Historical and Broker level (it is not recommended to enable caching on both). @@ -15,6 +16,7 @@ can be enabled at either the Historical and Broker level (it is not recommended Enabling caching on the broker can yield faster results than if query caches were enabled on Historicals for small clusters. This is the recommended setup for smaller production clusters (< 20 servers). Take note that when caching is enabled on the Broker, results from Historicals are returned on a per segment basis, and Historicals will not be able to do any local result merging. +Result level caching is enabled only on the Broker side. ## Query caching on Historicals diff --git a/docs/content/querying/query-context.md b/docs/content/querying/query-context.md index b0effe81cf9d..d4e2be28f123 100644 --- a/docs/content/querying/query-context.md +++ b/docs/content/querying/query-context.md @@ -15,6 +15,8 @@ The query context is used for various query configuration parameters. The follow |queryId | auto-generated | Unique identifier given to this query. If a query ID is set or known, this can be used to cancel the query | |useCache | `true` | Flag indicating whether to leverage the query cache for this query. When set to false, it disables reading from the query cache for this query. When set to true, Druid uses druid.broker.cache.useCache or druid.historical.cache.useCache to determine whether or not to read from the query cache | |populateCache | `true` | Flag indicating whether to save the results of the query to the query cache. Primarily used for debugging. When set to false, it disables saving the results of this query to the query cache. When set to true, Druid uses druid.broker.cache.populateCache or druid.historical.cache.populateCache to determine whether or not to save the results of this query to the query cache | +|useResultLevelCache | `false` | Flag indicating whether to leverage the result level cache for this query. When set to false, it disables reading from the query cache for this query. When set to true, Druid uses druid.broker.cache.useResultLevelCache to determine whether or not to read from the query cache | +|populateResultLevelCache | `false` | Flag indicating whether to save the results of the query to the result level cache. Primarily used for debugging. When set to false, it disables saving the results of this query to the query cache. When set to true, Druid uses druid.broker.cache.populateCache to determine whether or not to save the results of this query to the query cache | |bySegment | `false` | Return "by segment" results. Primarily used for debugging, setting it to `true` returns results associated with the data segment they came from | |finalize | `true` | Flag indicating whether to "finalize" aggregation results. Primarily used for debugging. For instance, the `hyperUnique` aggregator will return the full HyperLogLog sketch instead of the estimated cardinality when this flag is set to `false` | |chunkPeriod | `P0D` (off) | At the broker node level, long interval queries (of any type) may be broken into shorter interval queries to parallelize merging more than normal. Broken up queries will use a larger share of cluster resources, but may be able to complete faster as a result. Use ISO 8601 periods. For example, if this property is set to `P1M` (one month), then a query covering a year would be broken into 12 smaller queries. The broker uses its query processing executor service to initiate processing for query chunks, so make sure "druid.processing.numThreads" is configured appropriately on the broker. [groupBy queries](groupbyquery.html) do not support chunkPeriod by default, although they do if using the legacy "v1" engine. | diff --git a/processing/src/main/java/io/druid/query/CacheStrategy.java b/processing/src/main/java/io/druid/query/CacheStrategy.java index 95681dd1b067..2c3ec35ed193 100644 --- a/processing/src/main/java/io/druid/query/CacheStrategy.java +++ b/processing/src/main/java/io/druid/query/CacheStrategy.java @@ -26,7 +26,7 @@ import java.util.concurrent.ExecutorService; /** -*/ + */ @ExtensionPoint public interface CacheStrategy> { @@ -37,6 +37,7 @@ public interface CacheStrategy> * @param query the query to be cached * @param willMergeRunners indicates that {@link QueryRunnerFactory#mergeRunners(ExecutorService, Iterable)} will be * called on the cached by-segment results + * * @return true if the query is cacheable, otherwise false. */ boolean isCacheable(QueryType query, boolean willMergeRunners); @@ -45,6 +46,7 @@ public interface CacheStrategy> * Computes the cache key for the given query * * @param query the query to compute a cache key for + * * @return the cache key */ byte[] computeCacheKey(QueryType query); @@ -58,17 +60,32 @@ public interface CacheStrategy> /** * Returns a function that converts from the QueryType's result type to something cacheable. - * + *

* The resulting function must be thread-safe. * + * @param isResultLevelCache indicates whether the function is invoked for result-level caching or segment-level caching + * * @return a thread-safe function that converts the QueryType's result type into something cacheable */ - Function prepareForCache(); + Function prepareForCache(boolean isResultLevelCache); /** * A function that does the inverse of the operation that the function prepareForCache returns * + * @param isResultLevelCache indicates whether the function is invoked for result-level caching or segment-level caching + * * @return A function that does the inverse of the operation that the function prepareForCache returns */ - Function pullFromCache(); + Function pullFromCache(boolean isResultLevelCache); + + + default Function prepareForSegmentLevelCache() + { + return prepareForCache(false); + } + + default Function pullFromSegmentLevelCache() + { + return pullFromCache(false); + } } diff --git a/processing/src/main/java/io/druid/query/QueryContexts.java b/processing/src/main/java/io/druid/query/QueryContexts.java index d0a16fd87844..d88a536eb9ec 100644 --- a/processing/src/main/java/io/druid/query/QueryContexts.java +++ b/processing/src/main/java/io/druid/query/QueryContexts.java @@ -37,6 +37,8 @@ public class QueryContexts public static final boolean DEFAULT_BY_SEGMENT = false; public static final boolean DEFAULT_POPULATE_CACHE = true; public static final boolean DEFAULT_USE_CACHE = true; + public static final boolean DEFAULT_POPULATE_RESULTLEVEL_CACHE = true; + public static final boolean DEFAULT_USE_RESULTLEVEL_CACHE = true; public static final int DEFAULT_PRIORITY = 0; public static final int DEFAULT_UNCOVERED_INTERVALS_LIMIT = 0; public static final long DEFAULT_TIMEOUT_MILLIS = 300_000; // 5 minutes @@ -72,6 +74,26 @@ public static boolean isUseCache(Query query, boolean defaultValue) return parseBoolean(query, "useCache", defaultValue); } + public static boolean isPopulateResultLevelCache(Query query) + { + return isPopulateResultLevelCache(query, DEFAULT_POPULATE_RESULTLEVEL_CACHE); + } + + public static boolean isPopulateResultLevelCache(Query query, boolean defaultValue) + { + return parseBoolean(query, "populateResultLevelCache", defaultValue); + } + + public static boolean isUseResultLevelCache(Query query) + { + return isUseResultLevelCache(query, DEFAULT_USE_RESULTLEVEL_CACHE); + } + + public static boolean isUseResultLevelCache(Query query, boolean defaultValue) + { + return parseBoolean(query, "useResultLevelCache", defaultValue); + } + public static boolean isFinalize(Query query, boolean defaultValue) { return parseBoolean(query, "finalize", defaultValue); diff --git a/processing/src/main/java/io/druid/query/groupby/GroupByQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/groupby/GroupByQueryQueryToolChest.java index 28d5acb42de0..1252caa40ddd 100644 --- a/processing/src/main/java/io/druid/query/groupby/GroupByQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/groupby/GroupByQueryQueryToolChest.java @@ -50,6 +50,7 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.MetricManipulationFn; import io.druid.query.aggregation.MetricManipulatorFns; +import io.druid.query.aggregation.PostAggregator; import io.druid.query.cache.CacheKeyBuilder; import io.druid.query.dimension.DefaultDimensionSpec; import io.druid.query.dimension.DimensionSpec; @@ -408,7 +409,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function prepareForCache() + public Function prepareForCache(boolean isResultLevelCache) { return new Function() { @@ -426,6 +427,11 @@ public Object apply(Row input) for (AggregatorFactory agg : aggs) { retVal.add(event.get(agg.getName())); } + if (isResultLevelCache) { + for (PostAggregator postAgg : query.getPostAggregatorSpecs()) { + retVal.add(event.get(postAgg.getName())); + } + } return retVal; } @@ -435,7 +441,7 @@ public Object apply(Row input) } @Override - public Function pullFromCache() + public Function pullFromCache(boolean isResultLevelCache) { return new Function() { @@ -460,7 +466,12 @@ public Row apply(Object input) final AggregatorFactory factory = aggsIter.next(); event.put(factory.getName(), factory.deserialize(results.next())); } - + if (isResultLevelCache) { + Iterator postItr = query.getPostAggregatorSpecs().iterator(); + while (postItr.hasNext() && results.hasNext()) { + event.put(postItr.next().getName(), results.next()); + } + } if (dimsIter.hasNext() || aggsIter.hasNext() || results.hasNext()) { throw new ISE( "Found left over objects while reading from cache!! dimsIter[%s] aggsIter[%s] results[%s]", diff --git a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java index 4a921480f30f..5025d9dbc975 100644 --- a/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChest.java @@ -198,7 +198,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function prepareForCache() + public Function prepareForCache(boolean isResultLevelCache) { return new Function() { @@ -211,7 +211,7 @@ public SegmentAnalysis apply(@Nullable SegmentAnalysis input) } @Override - public Function pullFromCache() + public Function pullFromCache(boolean isResultLevelCache) { return new Function() { diff --git a/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java index 74148d66aea4..9efad496b351 100644 --- a/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/search/SearchQueryQueryToolChest.java @@ -206,7 +206,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function, Object> prepareForCache() + public Function, Object> prepareForCache(boolean isResultLevelCache) { return new Function, Object>() { @@ -221,7 +221,7 @@ public Object apply(Result input) } @Override - public Function> pullFromCache() + public Function> pullFromCache(boolean isResultLevelCache) { return new Function>() { diff --git a/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java index 786391515604..c0c3d8278502 100644 --- a/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/select/SelectQueryQueryToolChest.java @@ -243,7 +243,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function, Object> prepareForCache() + public Function, Object> prepareForCache(boolean isResultLevelCache) { return new Function, Object>() { @@ -272,7 +272,7 @@ public Object apply(final Result input) } @Override - public Function> pullFromCache() + public Function> pullFromCache(boolean isResultLevelCache) { return new Function>() { diff --git a/processing/src/main/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java index 99586b595dd3..a1046a00c05b 100644 --- a/processing/src/main/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java @@ -171,7 +171,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function, Object> prepareForCache() + public Function, Object> prepareForCache(boolean isResultLevelCache) { return new Function, Object>() { @@ -184,7 +184,7 @@ public Object apply(Result input) } @Override - public Function> pullFromCache() + public Function> pullFromCache(boolean isResultLevelCache) { return new Function>() { diff --git a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChest.java index 473775b71bc0..6e9f0307f70d 100644 --- a/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChest.java @@ -174,7 +174,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function, Object> prepareForCache() + public Function, Object> prepareForCache(boolean isResultLevelCache) { return new Function, Object>() { @@ -188,14 +188,18 @@ public Object apply(final Result input) for (AggregatorFactory agg : aggs) { retVal.add(results.getMetric(agg.getName())); } - + if (isResultLevelCache) { + for (PostAggregator postAgg : query.getPostAggregatorSpecs()) { + retVal.add(results.getMetric(postAgg.getName())); + } + } return retVal; } }; } @Override - public Function> pullFromCache() + public Function> pullFromCache(boolean isResultLevelCache) { return new Function>() { @@ -216,6 +220,12 @@ public Result apply(@Nullable Object input) final AggregatorFactory factory = aggsIter.next(); retVal.put(factory.getName(), factory.deserialize(resultIter.next())); } + if (isResultLevelCache) { + Iterator postItr = query.getPostAggregatorSpecs().iterator(); + while (postItr.hasNext() && resultIter.hasNext()) { + retVal.put(postItr.next().getName(), resultIter.next()); + } + } return new Result( timestamp, diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java index 81761afc6a37..c34e43eb1a46 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryQueryToolChest.java @@ -293,7 +293,6 @@ public TypeReference> getResultTypeReference() } - @Override public CacheStrategy, Object, TopNQuery> getCacheStrategy(final TopNQuery query) { @@ -341,7 +340,7 @@ public TypeReference getCacheObjectClazz() } @Override - public Function, Object> prepareForCache() + public Function, Object> prepareForCache(boolean isResultLevelCache) { return new Function, Object>() { @@ -361,6 +360,11 @@ public Object apply(final Result input) for (String aggName : aggFactoryNames) { vals.add(result.getMetric(aggName)); } + if (isResultLevelCache) { + for (PostAggregator postAgg : query.getPostAggregatorSpecs()) { + vals.add(result.getMetric(postAgg.getName())); + } + } retVal.add(vals); } return retVal; @@ -369,7 +373,7 @@ public Object apply(final Result input) } @Override - public Function> pullFromCache() + public Function> pullFromCache(boolean isResultLevelCache) { return new Function>() { @@ -401,7 +405,12 @@ public Result apply(Object input) for (PostAggregator postAgg : postAggs) { vals.put(postAgg.getName(), postAgg.compute(vals)); } - + if (isResultLevelCache) { + Iterator postItr = query.getPostAggregatorSpecs().iterator(); + while (postItr.hasNext() && resultIter.hasNext()) { + vals.put(postItr.next().getName(), resultIter.next()); + } + } retVal.add(vals); } diff --git a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index 359e39f6af2b..a497a6313801 100644 --- a/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -94,7 +94,7 @@ public void testCacheStrategy() throws Exception null ); - Object preparedValue = strategy.prepareForCache().apply(result); + Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result); ObjectMapper objectMapper = new DefaultObjectMapper(); SegmentAnalysis fromCacheValue = objectMapper.readValue( @@ -102,7 +102,7 @@ public void testCacheStrategy() throws Exception strategy.getCacheObjectClazz() ); - SegmentAnalysis fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); + SegmentAnalysis fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); Assert.assertEquals(result, fromCacheResult); } diff --git a/processing/src/test/java/io/druid/query/search/SearchQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/search/SearchQueryQueryToolChestTest.java index 621a435f5e95..dd73518ae5fe 100644 --- a/processing/src/test/java/io/druid/query/search/SearchQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/search/SearchQueryQueryToolChestTest.java @@ -59,7 +59,7 @@ public void testCacheStrategy() throws Exception new SearchResultValue(ImmutableList.of(new SearchHit("dim1", "a"))) ); - Object preparedValue = strategy.prepareForCache().apply( + Object preparedValue = strategy.prepareForSegmentLevelCache().apply( result ); @@ -69,7 +69,7 @@ public void testCacheStrategy() throws Exception strategy.getCacheObjectClazz() ); - Result fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); + Result fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); Assert.assertEquals(result, fromCacheResult); } diff --git a/processing/src/test/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java index e9092200682f..d947613dbf62 100644 --- a/processing/src/test/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java @@ -215,7 +215,7 @@ public void testCacheStrategy() throws Exception ) ); - Object preparedValue = strategy.prepareForCache().apply( + Object preparedValue = strategy.prepareForSegmentLevelCache().apply( result ); @@ -225,7 +225,7 @@ public void testCacheStrategy() throws Exception strategy.getCacheObjectClazz() ); - Result fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); + Result fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); Assert.assertEquals(result, fromCacheResult); } diff --git a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChestTest.java index 8e67921dd83e..0c4c0e7b6e86 100644 --- a/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/timeseries/TimeseriesQueryQueryToolChestTest.java @@ -32,6 +32,8 @@ import io.druid.query.TableDataSource; import io.druid.query.aggregation.CountAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ConstantPostAggregator; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.segment.TestHelper; import io.druid.segment.VirtualColumns; @@ -76,12 +78,12 @@ public void testCacheStrategy() throws Exception new CountAggregatorFactory("metric1"), new LongSumAggregatorFactory("metric0", "metric0") ), - null, + ImmutableList.of(new ConstantPostAggregator("post", 10)), null ) ); - final Result result = new Result<>( + final Result result1 = new Result<>( // test timestamps that result in integer size millis DateTimes.utc(123L), new TimeseriesResultValue( @@ -89,7 +91,7 @@ public void testCacheStrategy() throws Exception ) ); - Object preparedValue = strategy.prepareForCache().apply(result); + Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result1); ObjectMapper objectMapper = TestHelper.makeJsonMapper(); Object fromCacheValue = objectMapper.readValue( @@ -97,9 +99,26 @@ public void testCacheStrategy() throws Exception strategy.getCacheObjectClazz() ); - Result fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); + Result fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); + + Assert.assertEquals(result1, fromCacheResult); + + final Result result2 = new Result<>( + // test timestamps that result in integer size millis + DateTimes.utc(123L), + new TimeseriesResultValue( + ImmutableMap.of("metric1", 2, "metric0", 3, "post", 10) + ) + ); + + Object preparedResultLevelCacheValue = strategy.prepareForCache(true).apply(result2); + Object fromResultLevelCacheValue = objectMapper.readValue( + objectMapper.writeValueAsBytes(preparedResultLevelCacheValue), + strategy.getCacheObjectClazz() + ); - Assert.assertEquals(result, fromCacheResult); + Result fromResultLevelCacheRes = strategy.pullFromCache(true).apply(fromResultLevelCacheValue); + Assert.assertEquals(result2, fromResultLevelCacheRes); } @Test diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryQueryToolChestTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryQueryToolChestTest.java index 5cd277123a6c..1a4e78351f7e 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryQueryToolChestTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryQueryToolChestTest.java @@ -78,7 +78,7 @@ public void testCacheStrategy() throws Exception ) ); - final Result result = new Result<>( + final Result result1 = new Result<>( // test timestamps that result in integer size millis DateTimes.utc(123L), new TopNResultValue( @@ -91,8 +91,8 @@ public void testCacheStrategy() throws Exception ) ); - Object preparedValue = strategy.prepareForCache().apply( - result + Object preparedValue = strategy.prepareForSegmentLevelCache().apply( + result1 ); ObjectMapper objectMapper = TestHelper.makeJsonMapper(); @@ -101,9 +101,36 @@ public void testCacheStrategy() throws Exception strategy.getCacheObjectClazz() ); - Result fromCacheResult = strategy.pullFromCache().apply(fromCacheValue); + Result fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue); + + Assert.assertEquals(result1, fromCacheResult); + + final Result result2 = new Result<>( + // test timestamps that result in integer size millis + DateTimes.utc(123L), + new TopNResultValue( + Arrays.asList( + ImmutableMap.of( + "test", "val1", + "metric1", 2, + "post", 10 + ) + ) + ) + ); + + Object preparedResultCacheValue = strategy.prepareForCache(true).apply( + result2 + ); + + Object fromResultCacheValue = objectMapper.readValue( + objectMapper.writeValueAsBytes(preparedResultCacheValue), + strategy.getCacheObjectClazz() + ); + + Result fromResultCacheResult = strategy.pullFromCache(true).apply(fromResultCacheValue); + Assert.assertEquals(result2, fromResultCacheResult); - Assert.assertEquals(result, fromCacheResult); } @Test diff --git a/server/src/main/java/io/druid/client/CachingClusteredClient.java b/server/src/main/java/io/druid/client/CachingClusteredClient.java index 1e2e441949d8..df48cc0f21ef 100644 --- a/server/src/main/java/io/druid/client/CachingClusteredClient.java +++ b/server/src/main/java/io/druid/client/CachingClusteredClient.java @@ -500,7 +500,7 @@ private void addSequencesFromCache( return; } - final Function pullFromCacheFunction = strategy.pullFromCache(); + final Function pullFromCacheFunction = strategy.pullFromSegmentLevelCache(); final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz(); for (Pair cachedResultPair : cachedResults) { final byte[] cachedResult = cachedResultPair.rhs; @@ -600,7 +600,7 @@ private Sequence getAndCacheServerResults( .withQuerySegmentSpec(segmentsOfServerSpec), responseContext ); - final Function cacheFn = strategy.prepareForCache(); + final Function cacheFn = strategy.prepareForSegmentLevelCache(); return resultsBySegments .map(result -> { final BySegmentResultValueClass resultsOfSegment = result.getValue(); diff --git a/server/src/main/java/io/druid/client/CachingQueryRunner.java b/server/src/main/java/io/druid/client/CachingQueryRunner.java index 54ecc5c35a90..79bcbfb0d178 100644 --- a/server/src/main/java/io/druid/client/CachingQueryRunner.java +++ b/server/src/main/java/io/druid/client/CachingQueryRunner.java @@ -102,7 +102,7 @@ public Sequence run(QueryPlus queryPlus, Map responseConte } if (useCache) { - final Function cacheFn = strategy.pullFromCache(); + final Function cacheFn = strategy.pullFromSegmentLevelCache(); final byte[] cachedResult = cache.get(key); if (cachedResult != null) { final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz(); @@ -142,7 +142,7 @@ public void cleanup(Iterator iterFromMake) final Collection> cacheFutures = Collections.synchronizedList(Lists.>newLinkedList()); if (populateCache) { - final Function cacheFn = strategy.prepareForCache(); + final Function cacheFn = strategy.prepareForSegmentLevelCache(); return Sequences.withEffect( Sequences.map( diff --git a/server/src/main/java/io/druid/client/ResultLevelCacheUtil.java b/server/src/main/java/io/druid/client/ResultLevelCacheUtil.java new file mode 100644 index 000000000000..f55d2e6bba68 --- /dev/null +++ b/server/src/main/java/io/druid/client/ResultLevelCacheUtil.java @@ -0,0 +1,94 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.client; + +import io.druid.client.cache.Cache; +import io.druid.client.cache.CacheConfig; +import io.druid.java.util.common.logger.Logger; +import io.druid.java.util.common.StringUtils; +import io.druid.query.CacheStrategy; +import io.druid.query.Query; +import io.druid.query.QueryContexts; + +public class ResultLevelCacheUtil +{ + private static final Logger log = new Logger(ResultLevelCacheUtil.class); + + public static Cache.NamedKey computeResultLevelCacheKey( + String resultLevelCacheIdentifier + ) + { + return new Cache.NamedKey( + resultLevelCacheIdentifier, StringUtils.toUtf8(resultLevelCacheIdentifier) + ); + } + + public static void populate( + Cache cache, + Cache.NamedKey key, + byte[] resultBytes + ) + { + log.debug("Populating results into cache"); + cache.put(key, resultBytes); + } + + public static boolean useResultLevelCacheOnBrokers( + Query query, + CacheStrategy> strategy, + CacheConfig cacheConfig + ) + { + return useResultLevelCache(query, strategy, cacheConfig) && strategy.isCacheable(query, false); + } + + public static boolean populateResultLevelCacheOnBrokers( + Query query, + CacheStrategy> strategy, + CacheConfig cacheConfig + ) + { + return populateResultLevelCache(query, strategy, cacheConfig) && strategy.isCacheable(query, false); + } + + private static boolean useResultLevelCache( + Query query, + CacheStrategy> strategy, + CacheConfig cacheConfig + ) + { + return QueryContexts.isUseResultLevelCache(query) + && strategy != null + && cacheConfig.isUseResultLevelCache() + && cacheConfig.isQueryCacheable(query); + } + + private static boolean populateResultLevelCache( + Query query, + CacheStrategy> strategy, + CacheConfig cacheConfig + ) + { + return QueryContexts.isPopulateResultLevelCache(query) + && strategy != null + && cacheConfig.isPopulateResultLevelCache() + && cacheConfig.isQueryCacheable(query); + } +} diff --git a/server/src/main/java/io/druid/client/cache/CacheConfig.java b/server/src/main/java/io/druid/client/cache/CacheConfig.java index f1cc030c09f9..d73f9f387da0 100644 --- a/server/src/main/java/io/druid/client/cache/CacheConfig.java +++ b/server/src/main/java/io/druid/client/cache/CacheConfig.java @@ -29,13 +29,20 @@ public class CacheConfig { public static final String POPULATE_CACHE = "populateCache"; - + // The defaults defined here for cache related parameters are different from the QueryContext defaults due to legacy reasons. + // They should be made the same at some point in the future. @JsonProperty private boolean useCache = false; @JsonProperty private boolean populateCache = false; + @JsonProperty + private boolean useResultLevelCache = false; + + @JsonProperty + private boolean populateResultLevelCache = false; + @JsonProperty @Min(0) private int numBackgroundThreads = 0; @@ -44,6 +51,9 @@ public class CacheConfig @Min(0) private int cacheBulkMergeLimit = Integer.MAX_VALUE; + @JsonProperty + private int resultLevelCacheLimit = Integer.MAX_VALUE; + @JsonProperty private List unCacheable = Arrays.asList(Query.GROUP_BY, Query.SELECT); @@ -57,6 +67,16 @@ public boolean isUseCache() return useCache; } + public boolean isPopulateResultLevelCache() + { + return populateResultLevelCache; + } + + public boolean isUseResultLevelCache() + { + return useResultLevelCache; + } + public int getNumBackgroundThreads() { return numBackgroundThreads; @@ -67,6 +87,11 @@ public int getCacheBulkMergeLimit() return cacheBulkMergeLimit; } + public int getResultLevelCacheLimit() + { + return resultLevelCacheLimit; + } + public boolean isQueryCacheable(Query query) { return isQueryCacheable(query.getType()); diff --git a/server/src/main/java/io/druid/query/ResultLevelCachingQueryRunner.java b/server/src/main/java/io/druid/query/ResultLevelCachingQueryRunner.java new file mode 100644 index 000000000000..39a5a6de781d --- /dev/null +++ b/server/src/main/java/io/druid/query/ResultLevelCachingQueryRunner.java @@ -0,0 +1,302 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import io.druid.client.ResultLevelCacheUtil; +import io.druid.client.cache.Cache; +import io.druid.client.cache.CacheConfig; +import io.druid.java.util.common.RE; +import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.guava.Sequence; +import io.druid.java.util.common.guava.SequenceWrapper; +import io.druid.java.util.common.guava.Sequences; +import io.druid.java.util.common.logger.Logger; +import io.druid.server.QueryResource; + + +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Map; + +public class ResultLevelCachingQueryRunner implements QueryRunner +{ + private static final Logger log = new Logger(ResultLevelCachingQueryRunner.class); + private final QueryRunner baseRunner; + private ObjectMapper objectMapper; + private final Cache cache; + private final CacheConfig cacheConfig; + private final boolean useResultCache; + private final boolean populateResultCache; + private Query query; + private final CacheStrategy> strategy; + + + public ResultLevelCachingQueryRunner( + QueryRunner baseRunner, + QueryToolChest queryToolChest, + Query query, + ObjectMapper objectMapper, + Cache cache, + CacheConfig cacheConfig + ) + { + this.baseRunner = baseRunner; + this.objectMapper = objectMapper; + this.cache = cache; + this.cacheConfig = cacheConfig; + this.query = query; + this.strategy = queryToolChest.getCacheStrategy(query); + this.populateResultCache = ResultLevelCacheUtil.populateResultLevelCacheOnBrokers(query, strategy, cacheConfig); + this.useResultCache = ResultLevelCacheUtil.useResultLevelCacheOnBrokers(query, strategy, cacheConfig); + } + + @Override + public Sequence run(QueryPlus queryPlus, Map responseContext) + { + if (useResultCache || populateResultCache) { + + final String cacheKeyStr = StringUtils.fromUtf8(strategy.computeCacheKey(query)); + final byte[] cachedResultSet = fetchResultsFromResultLevelCache(cacheKeyStr); + String existingResultSetId = extractEtagFromResults(cachedResultSet); + + existingResultSetId = existingResultSetId == null ? "" : existingResultSetId; + query = query.withOverriddenContext( + ImmutableMap.of(QueryResource.HEADER_IF_NONE_MATCH, existingResultSetId)); + + Sequence resultFromClient = baseRunner.run( + QueryPlus.wrap(query), + responseContext + ); + String newResultSetId = (String) responseContext.get(QueryResource.HEADER_ETAG); + + if (useResultCache && newResultSetId != null && newResultSetId.equals(existingResultSetId)) { + log.debug("Return cached result set as there is no change in identifiers for query %s ", query.getId()); + return deserializeResults(cachedResultSet, strategy, existingResultSetId); + } else { + @Nullable + ResultLevelCachePopulator resultLevelCachePopulator = createResultLevelCachePopulator( + cacheKeyStr, + newResultSetId + ); + if (resultLevelCachePopulator == null) { + return resultFromClient; + } + final Function cacheFn = strategy.prepareForCache(true); + + return Sequences.wrap(Sequences.map( + resultFromClient, + new Function() + { + @Override + public T apply(T input) + { + if (resultLevelCachePopulator.isShouldPopulate()) { + resultLevelCachePopulator.cacheResultEntry(resultLevelCachePopulator, input, cacheFn); + } + return input; + } + } + ), new SequenceWrapper() + { + @Override + public void after(boolean isDone, Throwable thrown) + { + Preconditions.checkNotNull( + resultLevelCachePopulator, + "ResultLevelCachePopulator cannot be null during cache population" + ); + if (thrown != null) { + log.error( + thrown, + "Error while preparing for result level caching for query %s with error %s ", + query.getId(), + thrown.getMessage() + ); + } else if (resultLevelCachePopulator.isShouldPopulate()) { + // The resultset identifier and its length is cached along with the resultset + resultLevelCachePopulator.populateResults(); + log.debug("Cache population complete for query %s", query.getId()); + } + resultLevelCachePopulator.cacheObjectStream = null; + } + }); + } + } else { + return baseRunner.run( + queryPlus, + responseContext + ); + } + } + + private byte[] fetchResultsFromResultLevelCache( + final String queryCacheKey + ) + { + if (useResultCache && queryCacheKey != null) { + return cache.get(ResultLevelCacheUtil.computeResultLevelCacheKey(queryCacheKey)); + } + return null; + } + + private String extractEtagFromResults( + final byte[] cachedResult + ) + { + if (cachedResult == null) { + return null; + } + log.debug("Fetching result level cache identifier for query: %s", query.getId()); + int etagLength = ByteBuffer.wrap(cachedResult, 0, Integer.BYTES).getInt(); + return StringUtils.fromUtf8(Arrays.copyOfRange(cachedResult, Integer.BYTES, etagLength + Integer.BYTES)); + } + + private Sequence deserializeResults( + final byte[] cachedResult, CacheStrategy strategy, String resultSetId + ) + { + if (cachedResult == null) { + log.error("Cached result set is null"); + } + final Function pullFromCacheFunction = strategy.pullFromCache(true); + final TypeReference cacheObjectClazz = strategy.getCacheObjectClazz(); + //Skip the resultsetID and its length bytes + Sequence cachedSequence = Sequences.simple(() -> { + try { + int resultOffset = Integer.BYTES + resultSetId.length(); + return objectMapper.readValues( + objectMapper.getFactory().createParser( + cachedResult, + resultOffset, + cachedResult.length - resultOffset + ), + cacheObjectClazz + ); + } + catch (IOException e) { + throw new RE(e, "Failed to retrieve results from cache for query ID [%s]", query.getId()); + } + }); + + return Sequences.map(cachedSequence, pullFromCacheFunction); + } + + private ResultLevelCachePopulator createResultLevelCachePopulator( + String cacheKeyStr, + String resultSetId + ) + { + if (resultSetId != null && populateResultCache) { + ResultLevelCachePopulator resultLevelCachePopulator = new ResultLevelCachePopulator( + cache, + objectMapper, + ResultLevelCacheUtil.computeResultLevelCacheKey(cacheKeyStr), + cacheConfig, + true + ); + try { + // Save the resultSetId and its length + resultLevelCachePopulator.cacheObjectStream.write(ByteBuffer.allocate(Integer.BYTES) + .putInt(resultSetId.length()) + .array()); + resultLevelCachePopulator.cacheObjectStream.write(StringUtils.toUtf8(resultSetId)); + } + catch (IOException ioe) { + log.error(ioe, "Failed to write cached values for query %s", query.getId()); + return null; + } + return resultLevelCachePopulator; + } else { + return null; + } + } + + public class ResultLevelCachePopulator + { + private final Cache cache; + private final ObjectMapper mapper; + private final Cache.NamedKey key; + private final CacheConfig cacheConfig; + private ByteArrayOutputStream cacheObjectStream = new ByteArrayOutputStream(); + + public boolean isShouldPopulate() + { + return shouldPopulate; + } + + private boolean shouldPopulate; + + private ResultLevelCachePopulator( + Cache cache, + ObjectMapper mapper, + Cache.NamedKey key, + CacheConfig cacheConfig, + boolean shouldPopulate + ) + { + this.cache = cache; + this.mapper = mapper; + this.key = key; + this.cacheConfig = cacheConfig; + this.shouldPopulate = shouldPopulate; + } + + private void cacheResultEntry( + ResultLevelCachePopulator resultLevelCachePopulator, + T resultEntry, + Function cacheFn + ) + { + + int cacheLimit = cacheConfig.getResultLevelCacheLimit(); + try (JsonGenerator gen = mapper.getFactory().createGenerator(resultLevelCachePopulator.cacheObjectStream)) { + gen.writeObject(cacheFn.apply(resultEntry)); + if (cacheLimit > 0 && resultLevelCachePopulator.cacheObjectStream.size() > cacheLimit) { + shouldPopulate = false; + resultLevelCachePopulator.cacheObjectStream = null; + return; + } + } + catch (IOException ex) { + log.error(ex, "Failed to retrieve entry to be cached. Result Level caching will not be performed!"); + shouldPopulate = false; + resultLevelCachePopulator.cacheObjectStream = null; + } + } + + public void populateResults() + { + ResultLevelCacheUtil.populate( + cache, + key, + cacheObjectStream.toByteArray() + ); + } + } +} diff --git a/server/src/main/java/io/druid/server/ClientQuerySegmentWalker.java b/server/src/main/java/io/druid/server/ClientQuerySegmentWalker.java index 768e284ca1c5..e9ee6959ea32 100644 --- a/server/src/main/java/io/druid/server/ClientQuerySegmentWalker.java +++ b/server/src/main/java/io/druid/server/ClientQuerySegmentWalker.java @@ -24,6 +24,8 @@ import com.google.inject.Inject; import io.druid.java.util.emitter.service.ServiceEmitter; import io.druid.client.CachingClusteredClient; +import io.druid.client.cache.Cache; +import io.druid.client.cache.CacheConfig; import io.druid.query.FluentQueryRunnerBuilder; import io.druid.query.PostProcessingOperator; import io.druid.query.Query; @@ -31,6 +33,7 @@ import io.druid.query.QuerySegmentWalker; import io.druid.query.QueryToolChest; import io.druid.query.QueryToolChestWarehouse; +import io.druid.query.ResultLevelCachingQueryRunner; import io.druid.query.RetryQueryRunner; import io.druid.query.RetryQueryRunnerConfig; import io.druid.query.SegmentDescriptor; @@ -47,6 +50,9 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker private final RetryQueryRunnerConfig retryConfig; private final ObjectMapper objectMapper; private final ServerConfig serverConfig; + private final Cache cache; + private final CacheConfig cacheConfig; + @Inject public ClientQuerySegmentWalker( @@ -55,7 +61,9 @@ public ClientQuerySegmentWalker( QueryToolChestWarehouse warehouse, RetryQueryRunnerConfig retryConfig, ObjectMapper objectMapper, - ServerConfig serverConfig + ServerConfig serverConfig, + Cache cache, + CacheConfig cacheConfig ) { this.emitter = emitter; @@ -64,6 +72,8 @@ public ClientQuerySegmentWalker( this.retryConfig = retryConfig; this.objectMapper = objectMapper; this.serverConfig = serverConfig; + this.cache = cache; + this.cacheConfig = cacheConfig; } @Override @@ -81,6 +91,22 @@ public QueryRunner getQueryRunnerForSegments(Query query, Iterable QueryRunner makeRunner(Query query, QueryRunner baseClientRunner) { QueryToolChest> toolChest = warehouse.getToolChest(query); + + // This does not adhere to the fluent workflow. See https://github.com/druid-io/druid/issues/5517 + return new ResultLevelCachingQueryRunner<>(makeRunner(query, baseClientRunner, toolChest), + toolChest, + query, + objectMapper, + cache, + cacheConfig); + } + + private QueryRunner makeRunner( + Query query, + QueryRunner baseClientRunner, + QueryToolChest> toolChest + ) + { PostProcessingOperator postProcessing = objectMapper.convertValue( query.getContextValue("postProcessing"), new TypeReference>() @@ -105,6 +131,4 @@ private QueryRunner makeRunner(Query query, QueryRunner baseClientR .emitCPUTimeMetric(emitter) .postProcess(postProcessing); } - - } diff --git a/server/src/test/java/io/druid/client/CachingQueryRunnerTest.java b/server/src/test/java/io/druid/client/CachingQueryRunnerTest.java index 4ee3013eb7d1..65ee95acdfae 100644 --- a/server/src/test/java/io/druid/client/CachingQueryRunnerTest.java +++ b/server/src/test/java/io/druid/client/CachingQueryRunnerTest.java @@ -314,7 +314,7 @@ public boolean isUseCache() byte[] cacheValue = cache.get(cacheKey); Assert.assertNotNull(cacheValue); - Function fn = cacheStrategy.pullFromCache(); + Function fn = cacheStrategy.pullFromSegmentLevelCache(); List cacheResults = Lists.newArrayList( Iterators.transform( objectMapper.readValues( @@ -349,7 +349,7 @@ private void testUseCache( cache, objectMapper, cacheKey, - Iterables.transform(expectedResults, cacheStrategy.prepareForCache()) + Iterables.transform(expectedResults, cacheStrategy.prepareForSegmentLevelCache()) ); CachingQueryRunner runner = new CachingQueryRunner( From ea30c05355f7d572a66b82dab5e53d4de4476eb0 Mon Sep 17 00:00:00 2001 From: Nathan Hartwell Date: Mon, 26 Mar 2018 16:28:46 -0500 Subject: [PATCH 05/67] Adding ParserSpec for Influx Line Protocol (#5440) * Adding ParserSpec for Influx Line Protocol * Addressing PR feedback - Remove extraneous TODO - Better handling of parse errors (e.g. invalid timestamp) - Handle sub-millisecond timestamps * Adding documentation for Influx parser * Fixing docs --- .../development/extensions-contrib/influx.md | 46 ++++ extensions-contrib/influx-extensions/pom.xml | 112 +++++++++ .../data/input/influx/InfluxLineProtocol.g4 | 77 ++++++ .../input/influx/InfluxExtensionsModule.java | 51 ++++ .../data/input/influx/InfluxParseSpec.java | 63 +++++ .../druid/data/input/influx/InfluxParser.java | 173 ++++++++++++++ .../io.druid.initialization.DruidModule | 1 + .../data/input/influx/InfluxParserTest.java | 225 ++++++++++++++++++ pom.xml | 1 + 9 files changed, 749 insertions(+) create mode 100644 docs/content/development/extensions-contrib/influx.md create mode 100644 extensions-contrib/influx-extensions/pom.xml create mode 100644 extensions-contrib/influx-extensions/src/main/antlr4/io/druid/data/input/influx/InfluxLineProtocol.g4 create mode 100644 extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxExtensionsModule.java create mode 100644 extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParseSpec.java create mode 100644 extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParser.java create mode 100644 extensions-contrib/influx-extensions/src/main/resources/META-INF/services/io.druid.initialization.DruidModule create mode 100644 extensions-contrib/influx-extensions/src/test/java/io/druid/data/input/influx/InfluxParserTest.java diff --git a/docs/content/development/extensions-contrib/influx.md b/docs/content/development/extensions-contrib/influx.md new file mode 100644 index 000000000000..a8d29718a1f0 --- /dev/null +++ b/docs/content/development/extensions-contrib/influx.md @@ -0,0 +1,46 @@ +--- +layout: doc_page +--- + +# InfluxDB Line Protocol Parser + +To use this extension, make sure to [include](../../operations/including-extensions.html) `druid-influx-extensions`. + +This extension enables Druid to parse the [InfluxDB Line Protocol](https://docs.influxdata.com/influxdb/v1.5/write_protocols/line_protocol_tutorial/), a popular text-based timeseries metric serialization format. + +## Line Protocol + +A typical line looks like this: + +```cpu,application=dbhost=prdb123,region=us-east-1 usage_idle=99.24,usage_user=0.55 1520722030000000000``` + +which contains four parts: + - measurement: A string indicating the name of the measurement represented (e.g. cpu, network, web_requests) + - tags: zero or more key-value pairs (i.e. dimensions) + - measurements: one or more key-value pairs; values can be numeric, boolean, or string + - timestamp: nanoseconds since Unix epoch (the parser truncates it to milliseconds) + +The parser extracts these fields into a map, giving the measurement the key `measurement` and the timestamp the key `_ts`. The tag and measurement keys are copied verbatim, so users should take care to avoid name collisions. It is up to the ingestion spec to decide which fields should be treated as dimensions and which should be treated as metrics (typically tags correspond to dimensions and measurements correspond to metrics). + +The parser is configured like so: +```json +"parser": { + "type": "string", + "parseSpec": { + "format": "influx", + "timestampSpec": { + "column": "__ts", + "format": "millis" + }, + "dimensionsSpec": { + "dimensionExclusions": [ + "__ts" + ] + }, + "whitelistMeasurements": [ + "cpu" + ] + } +``` + +The `whitelistMeasurements` field is an optional list of strings. If present, measurements that do not match one of the strings in the list will be ignored. diff --git a/extensions-contrib/influx-extensions/pom.xml b/extensions-contrib/influx-extensions/pom.xml new file mode 100644 index 000000000000..0673a7957adf --- /dev/null +++ b/extensions-contrib/influx-extensions/pom.xml @@ -0,0 +1,112 @@ + + + + + 4.0.0 + + io.druid.extensions + druid-influx-extensions + druid-influx-extensions + druid-influx-extensions + + + io.druid + druid + 0.13.0-SNAPSHOT + ../../pom.xml + + + + + + + + + + + io.druid + druid-common + ${project.parent.version} + provided + + + org.antlr + antlr4-runtime + + + + + junit + junit + test + + + org.mockito + mockito-core + 2.2.10 + test + + + org.hamcrest + hamcrest-all + 1.3 + test + + + pl.pragmatists + JUnitParams + 1.0.4 + test + + + io.druid + druid-processing + ${project.parent.version} + test + + + + + + + org.antlr + antlr4-maven-plugin + + + + antlr4 + + + + + + + + + + strict + + + + + + + diff --git a/extensions-contrib/influx-extensions/src/main/antlr4/io/druid/data/input/influx/InfluxLineProtocol.g4 b/extensions-contrib/influx-extensions/src/main/antlr4/io/druid/data/input/influx/InfluxLineProtocol.g4 new file mode 100644 index 000000000000..89b419f09a27 --- /dev/null +++ b/extensions-contrib/influx-extensions/src/main/antlr4/io/druid/data/input/influx/InfluxLineProtocol.g4 @@ -0,0 +1,77 @@ +/** Based on v1.4 from their docs + at https://docs.influxdata.com/influxdb/v1.4/write_protocols/line_protocol_tutorial/ + **/ + +grammar InfluxLineProtocol; + +lines + : line ('\n' line)* '\n'? EOF +; + +line + : identifier (',' tag_set)? ' ' field_set (' ' timestamp)? +; + +timestamp + : NUMBER +; + +field_set + : field_pair (',' field_pair)* +; + +tag_set + : tag_pair (',' tag_pair)* +; + +tag_pair + : identifier '=' identifier +; + +field_pair + : identifier '=' field_value +; + +identifier + : IDENTIFIER_STRING | NUMBER | BOOLEAN +; + +field_value + : QUOTED_STRING | NUMBER | BOOLEAN +; + +eol + : NEWLINE | EOF +; + +NEWLINE + : '\n' +; + +NUMBER + : '-'? INT ('.' [0-9] +) ? 'i'? +; + +BOOLEAN + : 'TRUE' | 'true' | 'True' | 't' | 'T' | 'FALSE' | 'False' | 'false' | 'F' | 'f' +; + +QUOTED_STRING + : '"' (StringFieldEscapeSequence | ~(["\\]) )* '"' +; + +IDENTIFIER_STRING + : (IdentifierEscapeSequence | ~([,= \n\\]) )+ +; + +fragment IdentifierEscapeSequence + : '\\' [,= \\] +; + +fragment StringFieldEscapeSequence + : '\\' ["\\] +; + +fragment INT + : '0' | [1-9] [0-9]* +; diff --git a/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxExtensionsModule.java b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxExtensionsModule.java new file mode 100644 index 000000000000..964c05564e75 --- /dev/null +++ b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxExtensionsModule.java @@ -0,0 +1,51 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package io.druid.data.input.influx; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.inject.Binder; +import io.druid.initialization.DruidModule; + +import java.util.Collections; +import java.util.List; + +public class InfluxExtensionsModule implements DruidModule +{ + public InfluxExtensionsModule() + { + } + + @Override + public List getJacksonModules() + { + return Collections.singletonList( + new SimpleModule("InfluxInputRowParserModule") + .registerSubtypes( + new NamedType(InfluxParseSpec.class, "influx") + ) + ); + } + + @Override + public void configure(Binder binder) + { + } +} diff --git a/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParseSpec.java b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParseSpec.java new file mode 100644 index 000000000000..eb1a6f64a432 --- /dev/null +++ b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParseSpec.java @@ -0,0 +1,63 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package io.druid.data.input.influx; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.Sets; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.ParseSpec; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.java.util.common.parsers.Parser; + +import java.util.List; + +public class InfluxParseSpec extends ParseSpec +{ + private List measurementWhitelist; + + @JsonCreator + public InfluxParseSpec( + @JsonProperty("dimensionsSpec") DimensionsSpec dimensionsSpec, + @JsonProperty("whitelistMeasurements") List measurementWhitelist + ) + { + super( + new TimestampSpec(InfluxParser.TIMESTAMP_KEY, "millis", null), + dimensionsSpec != null ? dimensionsSpec : new DimensionsSpec(null, null, null) + ); + this.measurementWhitelist = measurementWhitelist; + } + + @Override + public Parser makeParser() + { + if (measurementWhitelist != null && measurementWhitelist.size() > 0) { + return new InfluxParser(Sets.newHashSet(measurementWhitelist)); + } else { + return new InfluxParser(null); + } + } + + @Override + public ParseSpec withDimensionsSpec(DimensionsSpec spec) + { + return new InfluxParseSpec(spec, measurementWhitelist); + } +} diff --git a/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParser.java b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParser.java new file mode 100644 index 000000000000..2c3e0ec40d5c --- /dev/null +++ b/extensions-contrib/influx-extensions/src/main/java/io/druid/data/input/influx/InfluxParser.java @@ -0,0 +1,173 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package io.druid.data.input.influx; + +import com.google.common.collect.ImmutableList; +import io.druid.java.util.common.parsers.ParseException; +import io.druid.java.util.common.parsers.Parser; +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.TokenStream; + +import javax.annotation.Nullable; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class InfluxParser implements Parser +{ + public static final String TIMESTAMP_KEY = "__ts"; + private static final String MEASUREMENT_KEY = "measurement"; + private final Set measurementWhitelist; + + public InfluxParser(Set measurementWhitelist) + { + this.measurementWhitelist = measurementWhitelist; + } + + @Override + public void startFileFromBeginning() + { + } + + @Nullable + @Override + public Map parseToMap(String input) + { + CharStream charStream = new ANTLRInputStream(input); + InfluxLineProtocolLexer lexer = new InfluxLineProtocolLexer(charStream); + TokenStream tokenStream = new CommonTokenStream(lexer); + InfluxLineProtocolParser parser = new InfluxLineProtocolParser(tokenStream); + + List lines = parser.lines().line(); + if (parser.getNumberOfSyntaxErrors() != 0) { + throw new ParseException("Unable to parse line."); + } + if (lines.size() != 1) { + throw new ParseException("Multiple lines present; unable to parse more than one per record."); + } + + Map out = new LinkedHashMap<>(); + + InfluxLineProtocolParser.LineContext line = lines.get(0); + String measurement = parseIdentifier(line.identifier()); + + if (!checkWhitelist(measurement)) { + throw new ParseException("Metric not whitelisted."); + } + + out.put(MEASUREMENT_KEY, measurement); + if (line.tag_set() != null) { + line.tag_set().tag_pair().forEach(t -> parseTag(t, out)); + } + + line.field_set().field_pair().forEach(t -> parseField(t, out)); + + if (line.timestamp() != null) { + String timestamp = line.timestamp().getText(); + parseTimestamp(timestamp, out); + } + return out; + } + + private void parseTag(InfluxLineProtocolParser.Tag_pairContext tag, Map out) + { + String key = parseIdentifier(tag.identifier(0)); + String value = parseIdentifier(tag.identifier(1)); + out.put(key, value); + } + + private void parseField(InfluxLineProtocolParser.Field_pairContext field, Map out) + { + String key = parseIdentifier(field.identifier()); + InfluxLineProtocolParser.Field_valueContext valueContext = field.field_value(); + Object value; + if (valueContext.NUMBER() != null) { + value = parseNumber(valueContext.NUMBER().getText()); + } else if (valueContext.BOOLEAN() != null) { + value = parseBool(valueContext.BOOLEAN().getText()); + } else { + value = parseQuotedString(valueContext.QUOTED_STRING().getText()); + } + out.put(key, value); + } + + private Object parseQuotedString(String text) + { + return text.substring(1, text.length() - 1).replaceAll("\\\\\"", "\""); + } + + private Object parseNumber(String raw) + { + if (raw.endsWith("i")) { + return new Long(raw.substring(0, raw.length() - 1)); + } + + return new Double(raw); + } + + private Object parseBool(String raw) + { + char first = raw.charAt(0); + if (first == 't' || first == 'T') { + return "true"; + } else { + return "false"; + } + } + + private String parseIdentifier(InfluxLineProtocolParser.IdentifierContext ctx) + { + if (ctx.BOOLEAN() != null || ctx.NUMBER() != null) { + return ctx.getText(); + } + + return ctx.IDENTIFIER_STRING().getText().replaceAll("\\\\([,= ])", "$1"); + } + + private boolean checkWhitelist(String m) + { + return (measurementWhitelist == null) || measurementWhitelist.contains(m); + } + + private void parseTimestamp(String timestamp, Map dest) + { + // Influx timestamps come in nanoseconds; treat anything less than 1 ms as 0 + if (timestamp.length() < 7) { + dest.put(TIMESTAMP_KEY, 0L); + } else { + timestamp = timestamp.substring(0, timestamp.length() - 6); + long timestampMillis = new Long(timestamp); + dest.put(TIMESTAMP_KEY, timestampMillis); + } + } + + @Override + public List getFieldNames() + { + return ImmutableList.of(); + } + + @Override + public void setFieldNames(Iterable fieldNames) + { + } +} diff --git a/extensions-contrib/influx-extensions/src/main/resources/META-INF/services/io.druid.initialization.DruidModule b/extensions-contrib/influx-extensions/src/main/resources/META-INF/services/io.druid.initialization.DruidModule new file mode 100644 index 000000000000..3ff740f01a24 --- /dev/null +++ b/extensions-contrib/influx-extensions/src/main/resources/META-INF/services/io.druid.initialization.DruidModule @@ -0,0 +1 @@ +io.druid.data.input.influx.InfluxExtensionsModule diff --git a/extensions-contrib/influx-extensions/src/test/java/io/druid/data/input/influx/InfluxParserTest.java b/extensions-contrib/influx-extensions/src/test/java/io/druid/data/input/influx/InfluxParserTest.java new file mode 100644 index 000000000000..0977bf5c3622 --- /dev/null +++ b/extensions-contrib/influx-extensions/src/test/java/io/druid/data/input/influx/InfluxParserTest.java @@ -0,0 +1,225 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.data.input.influx; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import io.druid.java.util.common.Pair; +import io.druid.java.util.common.parsers.ParseException; +import io.druid.java.util.common.parsers.Parser; +import junitparams.JUnitParamsRunner; +import junitparams.Parameters; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; + +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.isA; + +@RunWith(JUnitParamsRunner.class) +public class InfluxParserTest +{ + private String name; + private String input; + private Map expected; + + private static Object[] testCase(String name, String input, Parsed expected) + { + return Lists.newArrayList(name, input, expected).toArray(); + } + + + public Object[] testData() + { + return Lists.newArrayList( + testCase( + "real sample", + "cpu,host=foo.bar.baz,region=us-east-1,application=echo pct_idle=99.3,pct_user=88.8,m1_load=2i 1465839830100400200", + Parsed.row("cpu", 1465839830100L) + .with("host", "foo.bar.baz") + .with("region", "us-east-1") + .with("application", "echo") + .with("pct_idle", 99.3) + .with("pct_user", 88.8) + .with("m1_load", 2L) + ), + testCase( + "negative timestamp", + "foo,region=us-east-1,host=127.0.0.1 m=1.0,n=3.0,o=500i -123456789", + Parsed.row("foo", -123L) + .with("region", "us-east-1") + .with("host", "127.0.0.1") + .with("m", 1.0) + .with("n", 3.0) + .with("o", 500L) + ), + testCase( + "truncated timestamp", + "foo,region=us-east-1,host=127.0.0.1 m=1.0,n=3.0,o=500i 123", + Parsed.row("foo", 0L) + .with("region", "us-east-1") + .with("host", "127.0.0.1") + .with("m", 1.0) + .with("n", 3.0) + .with("o", 500L) + ), + testCase( + "special characters", + "!@#$%^&*()_-\\=+,+++\\ +++=--\\ --- __**__=\"ü\" 123456789", + Parsed.row("!@#$%^&*()_-=+", 123L) + .with("+++ +++", "-- ---") + .with("__**__", "127.0.0.1") + .with("__**__", "ü") + ), + testCase( + "unicode characters", + "\uD83D\uDE00,\uD83D\uDE05=\uD83D\uDE06 \uD83D\uDE0B=100i,b=\"\uD83D\uDE42\" 123456789", + Parsed.row("\uD83D\uDE00", 123L) + .with("\uD83D\uDE05", "\uD83D\uDE06") + .with("\uD83D\uDE0B", 100L) + .with("b", "\uD83D\uDE42") + ), + testCase( + "quoted string measurement value", + "foo,region=us-east-1,host=127.0.0.1 m=1.0,n=3.0,o=\"something \\\"cool\\\" \" 123456789", + Parsed.row("foo", 123L) + .with("region", "us-east-1") + .with("host", "127.0.0.1") + .with("m", 1.0) + .with("n", 3.0) + .with("o", "something \"cool\" ") + ), + testCase( + "no tags", + "foo m=1.0,n=3.0 123456789", + Parsed.row("foo", 123L) + .with("m", 1.0) + .with("n", 3.0) + ), + testCase( + "Escaped characters in identifiers", + "f\\,oo\\ \\=,bar=baz m=1.0,n=3.0 123456789", + Parsed.row("f,oo =", 123L) + .with("bar", "baz") + .with("m", 1.0) + .with("n", 3.0) + ), + testCase( + "Escaped characters in identifiers", + "foo\\ \\=,bar=baz m=1.0,n=3.0 123456789", + Parsed.row("foo =", 123L) + .with("bar", "baz") + .with("m", 1.0) + .with("n", 3.0) + ) + ).toArray(); + } + + @Test + @Parameters(method = "testData") + public void testParse(String name, String input, Parsed expected) + { + Parser parser = new InfluxParser(null); + Map parsed = parser.parseToMap(input); + assertThat("correct measurement name", parsed.get("measurement"), equalTo(expected.measurement)); + assertThat("correct timestamp", parsed.get(InfluxParser.TIMESTAMP_KEY), equalTo(expected.timestamp)); + expected.kv.forEach((k, v) -> { + assertThat("correct field " + k, parsed.get(k), equalTo(v)); + }); + parsed.remove("measurement"); + parsed.remove(InfluxParser.TIMESTAMP_KEY); + assertThat("No extra keys in parsed data", parsed.keySet(), equalTo(expected.kv.keySet())); + } + + @Test + public void testParseWhitelistPass() + { + Parser parser = new InfluxParser(Sets.newHashSet("cpu")); + String input = "cpu,host=foo.bar.baz,region=us-east,application=echo pct_idle=99.3,pct_user=88.8,m1_load=2 1465839830100400200"; + Map parsed = parser.parseToMap(input); + assertThat(parsed.get("measurement"), equalTo("cpu")); + } + + @Test + public void testParseWhitelistFail() + { + Parser parser = new InfluxParser(Sets.newHashSet("mem")); + String input = "cpu,host=foo.bar.baz,region=us-east,application=echo pct_idle=99.3,pct_user=88.8,m1_load=2 1465839830100400200"; + try { + parser.parseToMap(input); + } + catch (ParseException t) { + assertThat(t, isA(ParseException.class)); + return; + } + + Assert.fail("Exception not thrown"); + } + + public Object[] failureTestData() + { + return Lists.newArrayList( + Pair.of("Empty line", ""), + Pair.of("Invalid measurement", "invalid measurement"), + Pair.of("Invalid timestamp", "foo i=123 123x") + ).toArray(); + } + + @Test + @Parameters(method = "failureTestData") + public void testParseFailures(Pair testCase) + { + Parser parser = new InfluxParser(null); + try { + Map res = parser.parseToMap(testCase.rhs); + } + catch (ParseException t) { + assertThat(t, isA(ParseException.class)); + return; + } + + Assert.fail(testCase.rhs + ": exception not thrown"); + } + + private static class Parsed + { + private String measurement; + private Long timestamp; + private Map kv = new HashMap<>(); + + public static Parsed row(String measurement, Long timestamp) + { + Parsed e = new Parsed(); + e.measurement = measurement; + e.timestamp = timestamp; + return e; + } + + public Parsed with(String k, Object v) + { + kv.put(k, v); + return this; + } + } +} diff --git a/pom.xml b/pom.xml index 870fe78d0467..1830c8b7c7f8 100644 --- a/pom.xml +++ b/pom.xml @@ -125,6 +125,7 @@ extensions-core/simple-client-sslcontext extensions-core/druid-basic-security + extensions-contrib/influx-extensions extensions-contrib/azure-extensions extensions-contrib/cassandra-storage extensions-contrib/druid-rocketmq From 50e0e7f97d6e39ded9cd7ec9d8c6da43320d0048 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 26 Mar 2018 17:01:02 -0700 Subject: [PATCH 06/67] Correct lookup documentation (#5537) fixes #5536 --- docs/content/querying/lookups.md | 48 ++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/docs/content/querying/lookups.md b/docs/content/querying/lookups.md index 771c9adfb743..b6b83977a637 100644 --- a/docs/content/querying/lookups.md +++ b/docs/content/querying/lookups.md @@ -364,9 +364,53 @@ It is possible to save the configuration across restarts such that a node will n ## Introspect a Lookup -Lookup implementations can provide some introspection capabilities by implementing `LookupIntrospectHandler`. User will send request to `/druid/lookups/v1/introspect/{lookupId}` to enable introspection on a given lookup. +The broker provides an API for lookup introspection if the lookup type implements a `LookupIntrospectHandler`. -For instance you can list all the keys/values of a map based lookup by issuing a `GET` request to `/druid/lookups/v1/introspect/{lookupId}/keys"` or `/druid/lookups/v1/introspect/{lookupId}/values"` +A `GET` request to `/druid/v1/lookups/introspect/{lookupId}` will return the map of complete values. + +ex: `GET /druid/v1/lookups/introspect/nato-phonetic` +``` +{ + "A": "Alfa", + "B": "Bravo", + "C": "Charlie", + ... + "Y": "Yankee", + "Z": "Zulu", + "-": "Dash" +} + +``` + +The list of keys can be retrieved via `GET` to `/druid/v1/lookups/introspect/{lookupId}/keys"` + +ex: `GET /druid/v1/lookups/introspect/nato-phonetic/keys` +``` +[ + "A", + "B", + "C", + ... + "Y", + "Z", + "-" +] +``` + +A `GET` request to `/druid/v1/lookups/introspect/{lookupId}/values"` will return the list of values. + +ex: `GET /druid/v1/lookups/introspect/nato-phonetic/values` +``` +[ + "Alfa", + "Bravo", + "Charlie", + ... + "Yankee", + "Zulu", + "Dash" +] +``` ## Druid version 0.10.0 to 0.10.1 upgrade/downgrade Overall druid cluster lookups configuration is persisted in metadata store and also individual lookup nodes optionally persist a snapshot of loaded lookups on disk. From db508cf3ca5f2aa4fb7126e3b7f8d2bd4e58a599 Mon Sep 17 00:00:00 2001 From: Dyana Rose Date: Wed, 28 Mar 2018 21:53:38 +0100 Subject: [PATCH 07/67] [docs] fix invalid example json (#5547) https://github.com/druid-io/druid/issues/5546 --- docs/content/querying/datasourcemetadataquery.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/querying/datasourcemetadataquery.md b/docs/content/querying/datasourcemetadataquery.md index a812c15633ba..daee0dfdf301 100644 --- a/docs/content/querying/datasourcemetadataquery.md +++ b/docs/content/querying/datasourcemetadataquery.md @@ -29,7 +29,7 @@ The format of the result is: [ { "timestamp" : "2013-05-09T18:24:00.000Z", "result" : { - "maxIngestedEventTime" : "2013-05-09T18:24:09.007Z", + "maxIngestedEventTime" : "2013-05-09T18:24:09.007Z" } } ] ``` From 024e0a9cca4577202ec09de560946151b5da1b88 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 28 Mar 2018 14:15:38 -0700 Subject: [PATCH 08/67] Respect forceHashAggregation in queryContext (#5533) * Respect forceHashAggregation in queryContext * address comment --- .../epinephelinae/GroupByQueryEngineV2.java | 24 +++++++++---------- .../groupby/strategy/GroupByStrategyV2.java | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 6720863bb53a..3d48f1ad2b6a 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -91,7 +91,7 @@ public static Sequence process( final GroupByQuery query, final StorageAdapter storageAdapter, final NonBlockingPool intermediateResultsBufferPool, - final GroupByQueryConfig config + final GroupByQueryConfig querySpecificConfig ) { if (storageAdapter == null) { @@ -150,10 +150,10 @@ public GroupByEngineIterator make() final ByteBuffer buffer = bufferHolder.get(); // Check array-based aggregation is applicable - if (isArrayAggregateApplicable(config, query, dims, storageAdapter, buffer)) { + if (isArrayAggregateApplicable(querySpecificConfig, query, dims, storageAdapter, buffer)) { return new ArrayAggregateIterator( query, - config, + querySpecificConfig, cursor, buffer, fudgeTimestamp, @@ -165,7 +165,7 @@ public GroupByEngineIterator make() } else { return new HashAggregateIterator( query, - config, + querySpecificConfig, cursor, buffer, fudgeTimestamp, @@ -186,14 +186,14 @@ public void cleanup(GroupByEngineIterator iterFromMake) } private static boolean isArrayAggregateApplicable( - GroupByQueryConfig config, + GroupByQueryConfig querySpecificConfig, GroupByQuery query, GroupByColumnSelectorPlus[] dims, StorageAdapter storageAdapter, ByteBuffer buffer ) { - if (config.isForceHashAggregation()) { + if (querySpecificConfig.isForceHashAggregation()) { return false; } @@ -276,7 +276,7 @@ private abstract static class GroupByEngineIterator implements Iterator public GroupByEngineIterator( final GroupByQuery query, - final GroupByQueryConfig config, + final GroupByQueryConfig querySpecificConfig, final Cursor cursor, final ByteBuffer buffer, final DateTime fudgeTimestamp, @@ -285,7 +285,7 @@ public GroupByEngineIterator( ) { this.query = query; - this.querySpecificConfig = config.withOverrides(query); + this.querySpecificConfig = querySpecificConfig; this.cursor = cursor; this.buffer = buffer; this.keySerde = new GroupByEngineKeySerde(dims); @@ -413,7 +413,7 @@ private static class HashAggregateIterator extends GroupByEngineIterator process( StorageAdapter storageAdapter ) { - return GroupByQueryEngineV2.process(query, storageAdapter, bufferPool, configSupplier.get()); + return GroupByQueryEngineV2.process(query, storageAdapter, bufferPool, configSupplier.get().withOverrides(query)); } } From 912adcc2845d91c58b0f66621fb03d4cf969811c Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Wed, 28 Mar 2018 16:37:53 -0700 Subject: [PATCH 09/67] ArrayAggregation: Use long to avoid overflow (#5544) * ArrayAggregation: Use long to avoid overflow * Add Tests --- .../epinephelinae/BufferArrayGrouper.java | 4 ++-- .../epinephelinae/GroupByQueryEngineV2.java | 2 +- .../epinephelinae/BufferArrayGrouperTest.java | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouper.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouper.java index 6db3f884b201..4e4a30d8fac4 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouper.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouper.java @@ -61,7 +61,7 @@ public class BufferArrayGrouper implements IntGrouper private ByteBuffer usedFlagBuffer; private ByteBuffer valBuffer; - static int requiredBufferCapacity( + static long requiredBufferCapacity( int cardinality, AggregatorFactory[] aggregatorFactories ) @@ -72,7 +72,7 @@ static int requiredBufferCapacity( .sum(); return getUsedFlagBufferCapacity(cardinalityWithMissingValue) + // total used flags size - cardinalityWithMissingValue * recordSize; // total values size + (long) cardinalityWithMissingValue * recordSize; // total values size } /** diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java index 3d48f1ad2b6a..8e79e9e73d6b 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/GroupByQueryEngineV2.java @@ -219,7 +219,7 @@ private static boolean isArrayAggregateApplicable( final AggregatorFactory[] aggregatorFactories = query .getAggregatorSpecs() .toArray(new AggregatorFactory[query.getAggregatorSpecs().size()]); - final int requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity( + final long requiredBufferCapacity = BufferArrayGrouper.requiredBufferCapacity( cardinality, aggregatorFactories ); diff --git a/processing/src/test/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java b/processing/src/test/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java index 6ca584f2b8d3..acdc2c406047 100644 --- a/processing/src/test/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java +++ b/processing/src/test/java/io/druid/query/groupby/epinephelinae/BufferArrayGrouperTest.java @@ -86,4 +86,21 @@ private BufferArrayGrouper newGrouper( grouper.init(); return grouper; } + + @Test + public void testRequiredBufferCapacity() + { + int[] cardinalityArray = new int[] {1, 10, Integer.MAX_VALUE - 1}; + AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { + new LongSumAggregatorFactory("sum", "sum") + }; + + long[] requiredSizes = new long[] {17, 90, 16911433721L}; + + for (int i = 0; i < cardinalityArray.length; i++) { + Assert.assertEquals(requiredSizes[i], BufferArrayGrouper.requiredBufferCapacity( + cardinalityArray[i], + aggregatorFactories)); + } + } } From 81be1b396667c23be7f7506ad5e01170a4aa3b46 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 28 Mar 2018 18:58:39 -0700 Subject: [PATCH 10/67] this will fix it (#5549) --- integration-tests/docker/broker.conf | 10 +++++----- integration-tests/docker/historical.conf | 12 ++++++------ integration-tests/docker/middlemanager.conf | 4 ++-- integration-tests/docker/router.conf | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/integration-tests/docker/broker.conf b/integration-tests/docker/broker.conf index d791e82b70f5..73ad35bda074 100644 --- a/integration-tests/docker/broker.conf +++ b/integration-tests/docker/broker.conf @@ -1,10 +1,10 @@ [program:druid-broker] command=java -server - -Xmx1g - -Xms1g - -XX:NewSize=500m - -XX:MaxNewSize=500m + -Xmx512m + -Xms512m + -XX:NewSize=256m + -XX:MaxNewSize=256m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps @@ -12,7 +12,7 @@ command=java -Dfile.encoding=UTF-8 -Ddruid.host=%(ENV_HOST_IP)s -Ddruid.zk.service.host=druid-zookeeper-kafka - -Ddruid.processing.buffer.sizeBytes=75000000 + -Ddruid.processing.buffer.sizeBytes=25000000 -Ddruid.server.http.numThreads=100 -Ddruid.processing.numThreads=1 -Ddruid.broker.http.numConnections=30 diff --git a/integration-tests/docker/historical.conf b/integration-tests/docker/historical.conf index 58cbd84e2416..d0d2f1432126 100644 --- a/integration-tests/docker/historical.conf +++ b/integration-tests/docker/historical.conf @@ -1,10 +1,10 @@ [program:druid-historical] command=java -server - -Xmx1500m - -Xms1500m - -XX:NewSize=750m - -XX:MaxNewSize=750m + -Xmx512m + -Xms512m + -XX:NewSize=256m + -XX:MaxNewSize=256m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps @@ -14,8 +14,8 @@ command=java -Ddruid.zk.service.host=druid-zookeeper-kafka -Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ -Ddruid.s3.secretKey=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 - -Ddruid.processing.buffer.sizeBytes=75000000 - -Ddruid.processing.numThreads=3 + -Ddruid.processing.buffer.sizeBytes=25000000 + -Ddruid.processing.numThreads=2 -Ddruid.server.http.numThreads=100 -Ddruid.segmentCache.locations="[{\"path\":\"/shared/druid/indexCache\",\"maxSize\":5000000000}]" -Ddruid.server.maxSize=5000000000 diff --git a/integration-tests/docker/middlemanager.conf b/integration-tests/docker/middlemanager.conf index 2ca1560fb2c7..b719768e166f 100644 --- a/integration-tests/docker/middlemanager.conf +++ b/integration-tests/docker/middlemanager.conf @@ -10,11 +10,11 @@ command=java -Dfile.encoding=UTF-8 -Ddruid.host=%(ENV_HOST_IP)s -Ddruid.zk.service.host=druid-zookeeper-kafka - -Ddruid.worker.capacity=8 + -Ddruid.worker.capacity=3 -Ddruid.indexer.logs.directory=/shared/tasklogs -Ddruid.storage.storageDirectory=/shared/storage -Ddruid.indexer.runner.javaOpts=-server -Xmx256m -Xms256m -XX:NewSize=128m -XX:MaxNewSize=128m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps - -Ddruid.indexer.fork.property.druid.processing.buffer.sizeBytes=75000000 + -Ddruid.indexer.fork.property.druid.processing.buffer.sizeBytes=25000000 -Ddruid.indexer.fork.property.druid.processing.numThreads=1 -Ddruid.indexer.fork.server.http.numThreads=100 -Ddruid.s3.accessKey=AKIAIMKECRUYKDQGR6YQ diff --git a/integration-tests/docker/router.conf b/integration-tests/docker/router.conf index 06af3aab2656..6e22f65c6602 100644 --- a/integration-tests/docker/router.conf +++ b/integration-tests/docker/router.conf @@ -1,7 +1,7 @@ [program:druid-router] command=java -server - -Xmx1g + -Xmx128m -XX:+UseConcMarkSweepGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps From 8878a7ff943bf2ce6f15339d330bf4adf8d28472 Mon Sep 17 00:00:00 2001 From: Kirill Kozlov Date: Thu, 29 Mar 2018 06:00:08 +0200 Subject: [PATCH 11/67] Replace guava Charsets with native java StandardCharsets (#5545) --- .../data/input/impl/StringInputRowParser.java | 4 ++-- .../input/impl/InputRowParserSerdeTest.java | 6 +++--- ...fetchableTextFilesFirehoseFactoryTest.java | 3 +-- codestyle/druid-forbidden-apis.txt | 3 ++- .../storage/azure/AzureTaskLogsTest.java | 6 +++--- .../TimestampAggregationSelectTest.java | 4 ++-- .../quantiles/DoublesSketchOperations.java | 9 ++++----- .../common/tasklogs/HdfsTaskLogsTest.java | 12 ++++++------ .../indexing/kafka/KafkaIndexTaskClient.java | 4 ++-- .../indexing/kafka/KafkaIndexTaskTest.java | 4 ++-- .../kafka/supervisor/KafkaSupervisorTest.java | 4 ++-- .../io/druid/data/input/MapPopulator.java | 4 ++-- .../namespace/JSONFlatDataParserTest.java | 5 ++--- .../hadoop/DatasourceInputFormatTest.java | 4 ++-- .../indexing/common/task/MergeTaskBase.java | 12 ++++++------ .../indexing/overlord/RemoteTaskRunner.java | 4 ++-- .../indexing/overlord/hrtr/WorkerHolder.java | 4 ++-- .../common/task/MergeTaskBaseTest.java | 4 ++-- .../common/tasklogs/FileTaskLogsTest.java | 10 +++++----- .../clients/ClientInfoResourceTestClient.java | 8 ++++---- .../CoordinatorResourceTestClient.java | 4 ++-- .../EventReceiverFirehoseTestClient.java | 9 ++++----- .../clients/OverlordResourceTestClient.java | 4 ++-- .../clients/QueryResourceTestClient.java | 4 ++-- .../org/testng/DruidTestRunnerFactory.java | 10 +++++----- .../ITBasicAuthConfigurationTest.java | 12 ++++++------ .../druid/java/util/common/StringUtils.java | 6 +++--- .../util/common/io/smoosh/FileSmoosher.java | 4 ++-- .../common/io/smoosh/SmooshedFileMapper.java | 4 ++-- .../common/parsers/JSONFlattenerMaker.java | 4 ++-- .../common/parsers/JSONToLowerParser.java | 4 ++-- .../druid/java/util/http/client/Request.java | 6 +++--- .../java/util/metrics/cgroups/CpuAcct.java | 4 ++-- .../metrics/cgroups/ProcCgroupDiscoverer.java | 6 +++--- .../java/util/emitter/core/EmitterTest.java | 11 +++++------ .../core/ParametrizedUriEmitterTest.java | 8 ++++---- .../util/http/client/AsyncHttpClientTest.java | 3 +-- .../util/http/client/FriendlyServersTest.java | 19 +++++++++---------- .../util/http/client/JankyServersTest.java | 16 ++++++++-------- .../java/io/druid/guice/PropertiesModule.java | 4 ++-- .../test/java/io/druid/segment/TestIndex.java | 6 +++--- .../druid/client/CachingClusteredClient.java | 4 ++-- .../io/druid/client/DirectDruidClient.java | 18 +++++++++--------- .../io/druid/client/cache/MemcachedCache.java | 4 ++-- .../io/druid/discovery/DruidLeaderClient.java | 14 +++++++------- .../druid/server/log/FileRequestLogger.java | 4 ++-- .../druid/server/router/ConsistentHasher.java | 4 ++-- .../druid/server/router/RendezvousHasher.java | 4 ++-- .../client/CachingClusteredClientTest.java | 6 +++--- .../segment/indexing/DataSchemaTest.java | 6 +++--- .../firehose/IngestSegmentFirehoseTest.java | 4 ++-- .../firehose/LocalFirehoseFactoryTest.java | 3 +-- .../server/log/LoggingRequestLoggerTest.java | 4 ++-- 53 files changed, 165 insertions(+), 172 deletions(-) diff --git a/api/src/main/java/io/druid/data/input/impl/StringInputRowParser.java b/api/src/main/java/io/druid/data/input/impl/StringInputRowParser.java index cefe4706e2f4..2a7fcdca80af 100644 --- a/api/src/main/java/io/druid/data/input/impl/StringInputRowParser.java +++ b/api/src/main/java/io/druid/data/input/impl/StringInputRowParser.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import io.druid.data.input.ByteBufferInputRowParser; @@ -36,6 +35,7 @@ import java.nio.charset.Charset; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -43,7 +43,7 @@ */ public class StringInputRowParser implements ByteBufferInputRowParser { - private static final Charset DEFAULT_CHARSET = Charsets.UTF_8; + private static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; private final ParseSpec parseSpec; private final MapInputRowParser mapParser; diff --git a/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java b/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java index c7cb2df6340c..d9da1907a0ec 100644 --- a/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java +++ b/api/src/test/java/io/druid/data/input/impl/InputRowParserSerdeTest.java @@ -20,7 +20,6 @@ package io.druid.data.input.impl; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -37,6 +36,7 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -73,8 +73,8 @@ public void testStringInputRowParserSerde() throws Exception public void testStringInputRowParserSerdeMultiCharset() throws Exception { Charset[] testCharsets = { - Charsets.US_ASCII, Charsets.ISO_8859_1, Charsets.UTF_8, - Charsets.UTF_16BE, Charsets.UTF_16LE, Charsets.UTF_16 + StandardCharsets.US_ASCII, StandardCharsets.ISO_8859_1, StandardCharsets.UTF_8, + StandardCharsets.UTF_16BE, StandardCharsets.UTF_16LE, StandardCharsets.UTF_16 }; for (Charset testCharset : testCharsets) { diff --git a/api/src/test/java/io/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java b/api/src/test/java/io/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java index 340da4d49349..2d43fc3179a5 100644 --- a/api/src/test/java/io/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java +++ b/api/src/test/java/io/druid/data/input/impl/prefetch/PrefetchableTextFilesFirehoseFactoryTest.java @@ -19,7 +19,6 @@ package io.druid.data.input.impl.prefetch; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.Lists; @@ -82,7 +81,7 @@ public class PrefetchableTextFilesFirehoseFactoryTest false, 0 ), - Charsets.UTF_8.name() + StandardCharsets.UTF_8.name() ); @Rule diff --git a/codestyle/druid-forbidden-apis.txt b/codestyle/druid-forbidden-apis.txt index 96db08826006..c50619d0cf4f 100644 --- a/codestyle/druid-forbidden-apis.txt +++ b/codestyle/druid-forbidden-apis.txt @@ -1,4 +1,5 @@ com.google.common.collect.MapMaker @ Create java.util.concurrent.ConcurrentHashMap directly com.google.common.collect.Maps#newConcurrentMap() @ Create java.util.concurrent.ConcurrentHashMap directly com.google.common.util.concurrent.Futures#transform(com.google.common.util.concurrent.ListenableFuture, com.google.common.util.concurrent.AsyncFunction) @ Use io.druid.java.util.common.concurrent.ListenableFutures#transformAsync -com.google.common.collect.Iterators#emptyIterator() @ Use java.util.Collections#emptyIterator() \ No newline at end of file +com.google.common.collect.Iterators#emptyIterator() @ Use java.util.Collections#emptyIterator() +com.google.common.base.Charsets @ Use java.nio.charset.StandardCharsets instead \ No newline at end of file diff --git a/extensions-contrib/azure-extensions/src/test/java/io/druid/storage/azure/AzureTaskLogsTest.java b/extensions-contrib/azure-extensions/src/test/java/io/druid/storage/azure/AzureTaskLogsTest.java index 5f8394a735a4..456d80b530e8 100644 --- a/extensions-contrib/azure-extensions/src/test/java/io/druid/storage/azure/AzureTaskLogsTest.java +++ b/extensions-contrib/azure-extensions/src/test/java/io/druid/storage/azure/AzureTaskLogsTest.java @@ -19,7 +19,6 @@ package io.druid.storage.azure; -import com.google.common.base.Charsets; import com.google.common.base.Optional; import com.google.common.io.ByteSource; import com.google.common.io.Files; @@ -34,6 +33,7 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import static org.easymock.EasyMock.expect; import static org.easymock.EasyMock.expectLastCall; @@ -88,7 +88,7 @@ public void testStreamTaskLogWithoutOffset() throws Exception expect(azureStorage.getBlobExists(container, blobPath)).andReturn(true); expect(azureStorage.getBlobLength(container, blobPath)).andReturn((long) testLog.length()); expect(azureStorage.getBlobInputStream(container, blobPath)).andReturn( - new ByteArrayInputStream(testLog.getBytes(Charsets.UTF_8))); + new ByteArrayInputStream(testLog.getBytes(StandardCharsets.UTF_8))); replayAll(); @@ -111,7 +111,7 @@ public void testStreamTaskLogWithPositiveOffset() throws Exception expect(azureStorage.getBlobExists(container, blobPath)).andReturn(true); expect(azureStorage.getBlobLength(container, blobPath)).andReturn((long) testLog.length()); expect(azureStorage.getBlobInputStream(container, blobPath)).andReturn( - new ByteArrayInputStream(testLog.getBytes(Charsets.UTF_8))); + new ByteArrayInputStream(testLog.getBytes(StandardCharsets.UTF_8))); replayAll(); diff --git a/extensions-contrib/time-min-max/src/test/java/io/druid/query/aggregation/TimestampAggregationSelectTest.java b/extensions-contrib/time-min-max/src/test/java/io/druid/query/aggregation/TimestampAggregationSelectTest.java index 58fbdc700622..e862d3fcec08 100644 --- a/extensions-contrib/time-min-max/src/test/java/io/druid/query/aggregation/TimestampAggregationSelectTest.java +++ b/extensions-contrib/time-min-max/src/test/java/io/druid/query/aggregation/TimestampAggregationSelectTest.java @@ -19,7 +19,6 @@ package io.druid.query.aggregation; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; @@ -41,6 +40,7 @@ import javax.annotation.Nullable; import java.io.File; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.List; import java.util.zip.ZipFile; @@ -147,7 +147,7 @@ public void testSimpleDataIngestionAndSelectTest() throws Exception 0, Granularities.MONTH, 100, - Resources.toString(Resources.getResource("select.json"), Charsets.UTF_8) + Resources.toString(Resources.getResource("select.json"), StandardCharsets.UTF_8) ); Result result = (Result) Iterables.getOnlyElement(seq.toList()); diff --git a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchOperations.java b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchOperations.java index 5a0f70e6b76a..48e3f3c11872 100644 --- a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchOperations.java +++ b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchOperations.java @@ -19,13 +19,12 @@ package io.druid.query.aggregation.datasketches.quantiles; -import org.apache.commons.codec.binary.Base64; - -import com.google.common.base.Charsets; import com.yahoo.memory.Memory; import com.yahoo.sketches.quantiles.DoublesSketch; - import io.druid.java.util.common.ISE; +import org.apache.commons.codec.binary.Base64; + +import java.nio.charset.StandardCharsets; public class DoublesSketchOperations { @@ -48,7 +47,7 @@ public static DoublesSketch deserialize(final Object serializedSketch) public static DoublesSketch deserializeFromBase64EncodedString(final String str) { - return deserializeFromByteArray(Base64.decodeBase64(str.getBytes(Charsets.UTF_8))); + return deserializeFromByteArray(Base64.decodeBase64(str.getBytes(StandardCharsets.UTF_8))); } public static DoublesSketch deserializeFromByteArray(final byte[] data) diff --git a/extensions-core/hdfs-storage/src/test/java/io/druid/indexing/common/tasklogs/HdfsTaskLogsTest.java b/extensions-core/hdfs-storage/src/test/java/io/druid/indexing/common/tasklogs/HdfsTaskLogsTest.java index 981fb51bf257..9e4843417a54 100644 --- a/extensions-core/hdfs-storage/src/test/java/io/druid/indexing/common/tasklogs/HdfsTaskLogsTest.java +++ b/extensions-core/hdfs-storage/src/test/java/io/druid/indexing/common/tasklogs/HdfsTaskLogsTest.java @@ -19,7 +19,6 @@ package io.druid.indexing.common.tasklogs; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import com.google.common.io.ByteStreams; import com.google.common.io.Files; @@ -37,6 +36,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Map; public class HdfsTaskLogsTest @@ -50,7 +50,7 @@ public void testStream() throws Exception final File tmpDir = tempFolder.newFolder(); final File logDir = new File(tmpDir, "logs"); final File logFile = new File(tmpDir, "log"); - Files.write("blah", logFile, Charsets.UTF_8); + Files.write("blah", logFile, StandardCharsets.UTF_8); final TaskLogs taskLogs = new HdfsTaskLogs(new HdfsTaskLogsConfig(logDir.toString()), new Configuration()); taskLogs.pushTaskLog("foo", logFile); @@ -69,11 +69,11 @@ public void testOverwrite() throws Exception final File logFile = new File(tmpDir, "log"); final TaskLogs taskLogs = new HdfsTaskLogs(new HdfsTaskLogsConfig(logDir.toString()), new Configuration()); - Files.write("blah", logFile, Charsets.UTF_8); + Files.write("blah", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("foo", logFile); Assert.assertEquals("blah", readLog(taskLogs, "foo", 0)); - Files.write("blah blah", logFile, Charsets.UTF_8); + Files.write("blah blah", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("foo", logFile); Assert.assertEquals("blah blah", readLog(taskLogs, "foo", 0)); } @@ -90,7 +90,7 @@ public void testKill() throws Exception final TaskLogs taskLogs = new HdfsTaskLogs(new HdfsTaskLogsConfig(logDir.toString()), new Configuration()); - Files.write("log1content", logFile, Charsets.UTF_8); + Files.write("log1content", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("log1", logFile); Assert.assertEquals("log1content", readLog(taskLogs, "log1", 0)); @@ -101,7 +101,7 @@ public void testKill() throws Exception long time = (System.currentTimeMillis() / 1000) * 1000; Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log1")).getModificationTime() < time); - Files.write("log2content", logFile, Charsets.UTF_8); + Files.write("log2content", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("log2", logFile); Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0)); Assert.assertTrue(fs.getFileStatus(new Path(logDirPath, "log2")).getModificationTime() >= time); diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTaskClient.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTaskClient.java index c869ed4665bd..315cc0fda933 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTaskClient.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTaskClient.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; @@ -57,6 +56,7 @@ import java.io.IOException; import java.net.Socket; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.Callable; @@ -551,7 +551,7 @@ private FullResponseHolder submitRequest( } log.debug("HTTP %s: %s", method.getName(), serviceUri.toString()); - response = httpClient.go(request, new FullResponseHandler(Charsets.UTF_8), httpTimeout).get(); + response = httpClient.go(request, new FullResponseHandler(StandardCharsets.UTF_8), httpTimeout).get(); } catch (Exception e) { Throwables.propagateIfInstanceOf(e.getCause(), IOException.class); diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 13452ce42276..8bdd3c7f087f 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Throwables; @@ -151,6 +150,7 @@ import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -224,7 +224,7 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of() ), - Charsets.UTF_8.name() + StandardCharsets.UTF_8.name() ), Map.class ), diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java index a152fff6c685..f1c8ce97c9fe 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java @@ -20,7 +20,6 @@ package io.druid.indexing.kafka.supervisor; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Optional; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -91,6 +90,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -2003,7 +2003,7 @@ private static DataSchema getDataSchema(String dataSource) new JSONPathSpec(true, ImmutableList.of()), ImmutableMap.of() ), - Charsets.UTF_8.name() + StandardCharsets.UTF_8.name() ), Map.class ), diff --git a/extensions-core/lookups-cached-global/src/main/java/io/druid/data/input/MapPopulator.java b/extensions-core/lookups-cached-global/src/main/java/io/druid/data/input/MapPopulator.java index c0b768d3cb7c..fd6f122ad0da 100644 --- a/extensions-core/lookups-cached-global/src/main/java/io/druid/data/input/MapPopulator.java +++ b/extensions-core/lookups-cached-global/src/main/java/io/druid/data/input/MapPopulator.java @@ -19,13 +19,13 @@ package io.druid.data.input; -import com.google.common.base.Charsets; import com.google.common.io.ByteSource; import com.google.common.io.LineProcessor; import io.druid.java.util.common.ISE; import io.druid.java.util.common.parsers.Parser; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Map; /** @@ -79,7 +79,7 @@ public int getEntries() */ public PopulateResult populate(final ByteSource source, final Map map) throws IOException { - return source.asCharSource(Charsets.UTF_8).readLines( + return source.asCharSource(StandardCharsets.UTF_8).readLines( new LineProcessor() { private int lines = 0; diff --git a/extensions-core/lookups-cached-global/src/test/java/io/druid/query/lookup/namespace/JSONFlatDataParserTest.java b/extensions-core/lookups-cached-global/src/test/java/io/druid/query/lookup/namespace/JSONFlatDataParserTest.java index e636d66bdc59..c07c1ea43d9a 100644 --- a/extensions-core/lookups-cached-global/src/test/java/io/druid/query/lookup/namespace/JSONFlatDataParserTest.java +++ b/extensions-core/lookups-cached-global/src/test/java/io/druid/query/lookup/namespace/JSONFlatDataParserTest.java @@ -19,9 +19,7 @@ package io.druid.query.lookup.namespace; - import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; @@ -39,6 +37,7 @@ import org.junit.rules.TemporaryFolder; import java.io.File; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -68,7 +67,7 @@ public class JSONFlatDataParserTest public void setUp() throws Exception { tmpFile = temporaryFolder.newFile("lookup.json"); - final CharSink sink = Files.asByteSink(tmpFile).asCharSink(Charsets.UTF_8); + final CharSink sink = Files.asByteSink(tmpFile).asCharSink(StandardCharsets.UTF_8); sink.writeLines( Iterables.transform( MAPPINGS, diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceInputFormatTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceInputFormatTest.java index 0617b8ca55da..1b4af099662f 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceInputFormatTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/hadoop/DatasourceInputFormatTest.java @@ -19,7 +19,6 @@ package io.druid.indexer.hadoop; -import com.google.common.base.Charsets; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -51,6 +50,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -281,7 +281,7 @@ public void testGetSplitsUsingDefaultSupplier() throws Exception { // Use the builtin supplier, reading from the local filesystem, rather than testFormatter. final File tmpFile = temporaryFolder.newFile("something:with:colons"); - Files.write("dummy", tmpFile, Charsets.UTF_8); + Files.write("dummy", tmpFile, StandardCharsets.UTF_8); final ImmutableList mySegments = ImmutableList.of( WindowedDataSegment.of( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java b/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java index 747cf58d4162..7f2cae1a08e5 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/MergeTaskBase.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Objects; @@ -34,9 +33,6 @@ import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.hash.Hashing; -import io.druid.java.util.emitter.EmittingLogger; -import io.druid.java.util.emitter.service.ServiceEmitter; -import io.druid.java.util.emitter.service.ServiceMetricEvent; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; @@ -45,8 +41,11 @@ import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.ISE; import io.druid.java.util.common.StringUtils; -import io.druid.segment.writeout.SegmentWriteOutMediumFactory; +import io.druid.java.util.emitter.EmittingLogger; +import io.druid.java.util.emitter.service.ServiceEmitter; +import io.druid.java.util.emitter.service.ServiceMetricEvent; import io.druid.segment.IndexIO; +import io.druid.segment.writeout.SegmentWriteOutMediumFactory; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import org.joda.time.DateTime; @@ -54,6 +53,7 @@ import javax.annotation.Nullable; import java.io.File; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Set; @@ -307,7 +307,7 @@ public String apply(DataSegment x) return StringUtils.format( "%s_%s", dataSource, - Hashing.sha1().hashString(segmentIDs, Charsets.UTF_8).toString() + Hashing.sha1().hashString(segmentIDs, StandardCharsets.UTF_8).toString() ); } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java index 54427e435c24..ed7dbc3505d7 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Charsets; import com.google.common.base.Joiner; import com.google.common.base.Optional; import com.google.common.base.Preconditions; @@ -92,6 +91,7 @@ import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -128,7 +128,7 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer { private static final EmittingLogger log = new EmittingLogger(RemoteTaskRunner.class); - private static final StatusResponseHandler RESPONSE_HANDLER = new StatusResponseHandler(Charsets.UTF_8); + private static final StatusResponseHandler RESPONSE_HANDLER = new StatusResponseHandler(StandardCharsets.UTF_8); private static final Joiner JOINER = Joiner.on("/"); private final ObjectMapper jsonMapper; diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java b/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java index 199abb6821c8..17fa67f7cece 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.Sets; @@ -50,6 +49,7 @@ import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -73,7 +73,7 @@ public class WorkerHolder { }; - private static final StatusResponseHandler RESPONSE_HANDLER = new StatusResponseHandler(Charsets.UTF_8); + private static final StatusResponseHandler RESPONSE_HANDLER = new StatusResponseHandler(StandardCharsets.UTF_8); private final Worker worker; private Worker disabledWorker; diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java index a75b3fa45d5c..aa43a153e9a8 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/MergeTaskBaseTest.java @@ -19,7 +19,6 @@ package io.druid.indexing.common.task; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import com.google.common.hash.Hashing; import io.druid.indexing.common.TaskToolbox; @@ -29,6 +28,7 @@ import org.junit.Test; import java.io.File; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -80,7 +80,7 @@ public void testID() "2012-01-03T00:00:00.000Z_2012-01-05T00:00:00.000Z_V1_0" + "_2012-01-04T00:00:00.000Z_2012-01-06T00:00:00.000Z_V1_0" + "_2012-01-05T00:00:00.000Z_2012-01-07T00:00:00.000Z_V1_0", - Charsets.UTF_8 + StandardCharsets.UTF_8 ).toString() + "_"; Assert.assertEquals( diff --git a/indexing-service/src/test/java/io/druid/indexing/common/tasklogs/FileTaskLogsTest.java b/indexing-service/src/test/java/io/druid/indexing/common/tasklogs/FileTaskLogsTest.java index cb7b2282a68b..717709d97240 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/tasklogs/FileTaskLogsTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/tasklogs/FileTaskLogsTest.java @@ -19,7 +19,6 @@ package io.druid.indexing.common.tasklogs; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import com.google.common.io.ByteStreams; import com.google.common.io.Files; @@ -35,6 +34,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Map; public class FileTaskLogsTest @@ -53,7 +53,7 @@ public void testSimple() throws Exception try { final File logDir = new File(tmpDir, "druid/logs"); final File logFile = new File(tmpDir, "log"); - Files.write("blah", logFile, Charsets.UTF_8); + Files.write("blah", logFile, StandardCharsets.UTF_8); final TaskLogs taskLogs = new FileTaskLogs(new FileTaskLogsConfig(logDir)); taskLogs.pushTaskLog("foo", logFile); @@ -75,7 +75,7 @@ public void testPushTaskLogDirCreationFails() throws Exception final File tmpDir = temporaryFolder.newFolder(); final File logDir = new File(tmpDir, "druid/logs"); final File logFile = new File(tmpDir, "log"); - Files.write("blah", logFile, Charsets.UTF_8); + Files.write("blah", logFile, StandardCharsets.UTF_8); if (!tmpDir.setWritable(false)) { throw new RuntimeException("failed to make tmp dir read-only"); @@ -96,7 +96,7 @@ public void testKill() throws Exception final File logFile = new File(tmpDir, "log"); final TaskLogs taskLogs = new FileTaskLogs(new FileTaskLogsConfig(logDir)); - Files.write("log1content", logFile, Charsets.UTF_8); + Files.write("log1content", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("log1", logFile); Assert.assertEquals("log1content", readLog(taskLogs, "log1", 0)); @@ -107,7 +107,7 @@ public void testKill() throws Exception long time = (System.currentTimeMillis() / 1000) * 1000; Assert.assertTrue(new File(logDir, "log1.log").lastModified() < time); - Files.write("log2content", logFile, Charsets.UTF_8); + Files.write("log2content", logFile, StandardCharsets.UTF_8); taskLogs.pushTaskLog("log2", logFile); Assert.assertEquals("log2content", readLog(taskLogs, "log2", 0)); Assert.assertTrue(new File(logDir, "log2.log").lastModified() >= time); diff --git a/integration-tests/src/main/java/io/druid/testing/clients/ClientInfoResourceTestClient.java b/integration-tests/src/main/java/io/druid/testing/clients/ClientInfoResourceTestClient.java index 65bccb904f53..ebda5570165c 100644 --- a/integration-tests/src/main/java/io/druid/testing/clients/ClientInfoResourceTestClient.java +++ b/integration-tests/src/main/java/io/druid/testing/clients/ClientInfoResourceTestClient.java @@ -21,21 +21,21 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Inject; +import io.druid.java.util.common.ISE; +import io.druid.java.util.common.StringUtils; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.response.StatusResponseHandler; import io.druid.java.util.http.client.response.StatusResponseHolder; -import io.druid.java.util.common.ISE; -import io.druid.java.util.common.StringUtils; import io.druid.testing.IntegrationTestingConfig; import io.druid.testing.guice.TestClient; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.List; public class ClientInfoResourceTestClient @@ -55,7 +55,7 @@ public class ClientInfoResourceTestClient this.jsonMapper = jsonMapper; this.httpClient = httpClient; this.brokerUrl = config.getBrokerUrl(); - this.responseHandler = new StatusResponseHandler(Charsets.UTF_8); + this.responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); } private String getBrokerURL() diff --git a/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java b/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java index 003e76a1af22..fc9f009e135f 100644 --- a/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java +++ b/integration-tests/src/main/java/io/druid/testing/clients/CoordinatorResourceTestClient.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Inject; import io.druid.java.util.common.ISE; @@ -38,6 +37,7 @@ import org.joda.time.Interval; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -59,7 +59,7 @@ public class CoordinatorResourceTestClient this.jsonMapper = jsonMapper; this.httpClient = httpClient; this.coordinator = config.getCoordinatorUrl(); - this.responseHandler = new StatusResponseHandler(Charsets.UTF_8); + this.responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); } private String getCoordinatorURL() diff --git a/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java b/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java index 1cdb6a91ae7b..697f24fdf515 100644 --- a/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java +++ b/integration-tests/src/main/java/io/druid/testing/clients/EventReceiverFirehoseTestClient.java @@ -22,15 +22,14 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; +import io.druid.java.util.common.ISE; +import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.jackson.JacksonUtils; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.response.StatusResponseHandler; import io.druid.java.util.http.client.response.StatusResponseHolder; -import io.druid.java.util.common.jackson.JacksonUtils; -import io.druid.java.util.common.ISE; -import io.druid.java.util.common.StringUtils; import io.druid.testing.guice.TestClient; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; @@ -63,7 +62,7 @@ public EventReceiverFirehoseTestClient( { this.host = host; this.jsonMapper = jsonMapper; - this.responseHandler = new StatusResponseHandler(Charsets.UTF_8); + this.responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); this.httpClient = httpClient; this.chatID = chatID; this.smileMapper = smileMapper; diff --git a/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java b/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java index 7062854e1f61..74162155e23d 100644 --- a/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java +++ b/integration-tests/src/main/java/io/druid/testing/clients/OverlordResourceTestClient.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Predicates; import com.google.common.base.Throwables; import com.google.inject.Inject; @@ -43,6 +42,7 @@ import java.net.URL; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; @@ -65,7 +65,7 @@ public class OverlordResourceTestClient this.jsonMapper = jsonMapper; this.httpClient = httpClient; this.indexer = config.getIndexerUrl(); - this.responseHandler = new StatusResponseHandler(Charsets.UTF_8); + this.responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); } private String getIndexerURL() diff --git a/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java b/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java index b6a2c6bcce30..31d5aef0052d 100644 --- a/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java +++ b/integration-tests/src/main/java/io/druid/testing/clients/QueryResourceTestClient.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Inject; import io.druid.java.util.http.client.HttpClient; @@ -38,6 +37,7 @@ import org.jboss.netty.handler.codec.http.HttpResponseStatus; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; @@ -58,7 +58,7 @@ public class QueryResourceTestClient this.jsonMapper = jsonMapper; this.httpClient = httpClient; this.routerUrl = config.getRouterUrl(); - this.responseHandler = new StatusResponseHandler(Charsets.UTF_8); + this.responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); } private String getBrokerURL() diff --git a/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java b/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java index 939fc7ac7fbe..1fb991448526 100644 --- a/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java +++ b/integration-tests/src/main/java/org/testng/DruidTestRunnerFactory.java @@ -19,17 +19,16 @@ package org.testng; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Injector; import com.google.inject.Key; +import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.lifecycle.Lifecycle; +import io.druid.java.util.common.logger.Logger; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.response.StatusResponseHandler; import io.druid.java.util.http.client.response.StatusResponseHolder; -import io.druid.java.util.common.StringUtils; -import io.druid.java.util.common.lifecycle.Lifecycle; -import io.druid.java.util.common.logger.Logger; import io.druid.testing.IntegrationTestingConfig; import io.druid.testing.guice.DruidTestModuleFactory; import io.druid.testing.guice.TestClient; @@ -41,6 +40,7 @@ import org.testng.xml.XmlTest; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.List; public class DruidTestRunnerFactory implements ITestRunnerFactory @@ -117,7 +117,7 @@ private void runTests() public void waitUntilInstanceReady(final HttpClient client, final String host) { - final StatusResponseHandler handler = new StatusResponseHandler(Charsets.UTF_8); + final StatusResponseHandler handler = new StatusResponseHandler(StandardCharsets.UTF_8); RetryUtil.retryUntilTrue( () -> { try { diff --git a/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java b/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java index dd9344846f11..c4d3b41e908f 100644 --- a/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java +++ b/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java @@ -21,19 +21,18 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Inject; +import io.druid.guice.annotations.Client; +import io.druid.java.util.common.ISE; +import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.logger.Logger; import io.druid.java.util.http.client.CredentialedHttpClient; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.auth.BasicCredentials; import io.druid.java.util.http.client.response.StatusResponseHandler; import io.druid.java.util.http.client.response.StatusResponseHolder; -import io.druid.guice.annotations.Client; -import io.druid.java.util.common.ISE; -import io.druid.java.util.common.StringUtils; -import io.druid.java.util.common.logger.Logger; import io.druid.security.basic.authentication.entity.BasicAuthenticatorCredentialUpdate; import io.druid.server.security.Action; import io.druid.server.security.Resource; @@ -51,6 +50,7 @@ import javax.ws.rs.core.MediaType; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; @@ -80,7 +80,7 @@ public class ITBasicAuthConfigurationTest @Client HttpClient httpClient; - StatusResponseHandler responseHandler = new StatusResponseHandler(Charsets.UTF_8); + StatusResponseHandler responseHandler = new StatusResponseHandler(StandardCharsets.UTF_8); @Test public void testAuthConfiguration() throws Exception diff --git a/java-util/src/main/java/io/druid/java/util/common/StringUtils.java b/java-util/src/main/java/io/druid/java/util/common/StringUtils.java index d073ef38b42f..16830e1b20f4 100644 --- a/java-util/src/main/java/io/druid/java/util/common/StringUtils.java +++ b/java-util/src/main/java/io/druid/java/util/common/StringUtils.java @@ -19,13 +19,13 @@ package io.druid.java.util.common; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import javax.annotation.Nullable; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.IllegalFormatException; import java.util.Locale; @@ -37,8 +37,8 @@ public class StringUtils { public static final byte[] EMPTY_BYTES = new byte[0]; @Deprecated // Charset parameters to String are currently slower than the charset's string name - public static final Charset UTF8_CHARSET = Charsets.UTF_8; - public static final String UTF8_STRING = Charsets.UTF_8.toString(); + public static final Charset UTF8_CHARSET = StandardCharsets.UTF_8; + public static final String UTF8_STRING = StandardCharsets.UTF_8.toString(); // should be used only for estimation // returns the same result with StringUtils.fromUtf8(value).length for valid string values diff --git a/java-util/src/main/java/io/druid/java/util/common/io/smoosh/FileSmoosher.java b/java-util/src/main/java/io/druid/java/util/common/io/smoosh/FileSmoosher.java index b928bbba5664..1e06098426f8 100644 --- a/java-util/src/main/java/io/druid/java/util/common/io/smoosh/FileSmoosher.java +++ b/java-util/src/main/java/io/druid/java/util/common/io/smoosh/FileSmoosher.java @@ -19,7 +19,6 @@ package io.druid.java.util.common.io.smoosh; -import com.google.common.base.Charsets; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -45,6 +44,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.GatheringByteChannel; +import java.nio.charset.StandardCharsets; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Collections; @@ -370,7 +370,7 @@ public void close() throws IOException File metaFile = metaFile(baseDir); - try (Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(metaFile), Charsets.UTF_8))) { + try (Writer out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(metaFile), StandardCharsets.UTF_8))) { out.write(StringUtils.format("v1,%d,%d", maxChunkSize, outFiles.size())); out.write("\n"); diff --git a/java-util/src/main/java/io/druid/java/util/common/io/smoosh/SmooshedFileMapper.java b/java-util/src/main/java/io/druid/java/util/common/io/smoosh/SmooshedFileMapper.java index bb95dce7dc97..38d79888a404 100644 --- a/java-util/src/main/java/io/druid/java/util/common/io/smoosh/SmooshedFileMapper.java +++ b/java-util/src/main/java/io/druid/java/util/common/io/smoosh/SmooshedFileMapper.java @@ -19,7 +19,6 @@ package io.druid.java.util.common.io.smoosh; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -36,6 +35,7 @@ import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; import java.util.Set; @@ -52,7 +52,7 @@ public static SmooshedFileMapper load(File baseDir) throws IOException BufferedReader in = null; try { - in = new BufferedReader(new InputStreamReader(new FileInputStream(metaFile), Charsets.UTF_8)); + in = new BufferedReader(new InputStreamReader(new FileInputStream(metaFile), StandardCharsets.UTF_8)); String line = in.readLine(); if (line == null) { diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/JSONFlattenerMaker.java b/java-util/src/main/java/io/druid/java/util/common/parsers/JSONFlattenerMaker.java index 5f1c6736a024..06f290ac7c11 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -20,7 +20,6 @@ package io.druid.java.util.common.parsers; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.base.Charsets; import com.google.common.collect.FluentIterable; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; @@ -33,6 +32,7 @@ import javax.annotation.Nullable; import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.EnumSet; import java.util.Iterator; @@ -50,7 +50,7 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker discoverRootFields(final JsonNode obj) diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/JSONToLowerParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/JSONToLowerParser.java index 5a04d9b75bf5..f647f39eb351 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/JSONToLowerParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/JSONToLowerParser.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -29,6 +28,7 @@ import io.druid.java.util.common.StringUtils; import java.nio.charset.CharsetEncoder; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; @@ -65,7 +65,7 @@ public Object apply(JsonNode node) return node.asDouble(); } final String s = node.asText(); - final CharsetEncoder enc = Charsets.UTF_8.newEncoder(); + final CharsetEncoder enc = StandardCharsets.UTF_8.newEncoder(); if (s != null && !enc.canEncode(s)) { // Some whacky characters are in this string (e.g. \uD900). These are problematic because they are decodeable // by new String(...) but will not encode into the same character. This dance here will replace these diff --git a/java-util/src/main/java/io/druid/java/util/http/client/Request.java b/java-util/src/main/java/io/druid/java/util/http/client/Request.java index a67270de55ba..161315ca78c8 100644 --- a/java-util/src/main/java/io/druid/java/util/http/client/Request.java +++ b/java-util/src/main/java/io/druid/java/util/http/client/Request.java @@ -19,7 +19,6 @@ package io.druid.java.util.http.client; -import com.google.common.base.Charsets; import com.google.common.base.Supplier; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -35,6 +34,7 @@ import java.net.URL; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -175,7 +175,7 @@ private String base64Encode(final String value) final ChannelBufferFactory bufferFactory = HeapChannelBufferFactory.getInstance(); return Base64 - .encode(bufferFactory.getBuffer(ByteBuffer.wrap(value.getBytes(Charsets.UTF_8))), false) - .toString(Charsets.UTF_8); + .encode(bufferFactory.getBuffer(ByteBuffer.wrap(value.getBytes(StandardCharsets.UTF_8))), false) + .toString(StandardCharsets.UTF_8); } } diff --git a/java-util/src/main/java/io/druid/java/util/metrics/cgroups/CpuAcct.java b/java-util/src/main/java/io/druid/java/util/metrics/cgroups/CpuAcct.java index ad0ceb53525f..ab789e0eef07 100644 --- a/java-util/src/main/java/io/druid/java/util/metrics/cgroups/CpuAcct.java +++ b/java-util/src/main/java/io/druid/java/util/metrics/cgroups/CpuAcct.java @@ -19,7 +19,6 @@ package io.druid.java.util.metrics.cgroups; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import io.druid.java.util.common.RE; import io.druid.java.util.common.logger.Logger; @@ -27,6 +26,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.List; import java.util.stream.LongStream; @@ -83,7 +83,7 @@ public CpuAcctMetric snapshot() return new CpuAcctMetric(new long[0], new long[0]); } try { - return parse(Files.readAllLines(cpuacct.toPath(), Charsets.UTF_8)); + return parse(Files.readAllLines(cpuacct.toPath(), StandardCharsets.UTF_8)); } catch (IOException e) { throw new RuntimeException(e); diff --git a/java-util/src/main/java/io/druid/java/util/metrics/cgroups/ProcCgroupDiscoverer.java b/java-util/src/main/java/io/druid/java/util/metrics/cgroups/ProcCgroupDiscoverer.java index 619a8e6f686e..36d66ff866a1 100644 --- a/java-util/src/main/java/io/druid/java/util/metrics/cgroups/ProcCgroupDiscoverer.java +++ b/java-util/src/main/java/io/druid/java/util/metrics/cgroups/ProcCgroupDiscoverer.java @@ -19,7 +19,6 @@ package io.druid.java.util.metrics.cgroups; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.io.Files; @@ -28,6 +27,7 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; @@ -76,7 +76,7 @@ private PidCgroupEntry getCgroupEntry(final File procCgroup, final String cgroup { final List lines; try { - lines = Files.readLines(procCgroup, Charsets.UTF_8); + lines = Files.readLines(procCgroup, StandardCharsets.UTF_8); } catch (IOException e) { throw new RuntimeException(e); @@ -97,7 +97,7 @@ private ProcMountsEntry getMountEntry(final File procMounts, final String cgroup { final List lines; try { - lines = Files.readLines(procMounts, Charsets.UTF_8); + lines = Files.readLines(procMounts, StandardCharsets.UTF_8); } catch (IOException e) { throw new RuntimeException(e); diff --git a/java-util/src/test/java/io/druid/java/util/emitter/core/EmitterTest.java b/java-util/src/test/java/io/druid/java/util/emitter/core/EmitterTest.java index 2653ba817cf4..78e6c0698b2b 100644 --- a/java-util/src/test/java/io/druid/java/util/emitter/core/EmitterTest.java +++ b/java-util/src/test/java/io/druid/java/util/emitter/core/EmitterTest.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.io.BaseEncoding; import io.druid.java.util.common.CompressionUtils; import io.druid.java.util.common.StringUtils; @@ -229,7 +228,7 @@ protected ListenableFuture go(Request request) throws JsonProcessingEx jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); @@ -271,7 +270,7 @@ protected ListenableFuture go(Request request) throws JsonProcessingEx jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); @@ -450,7 +449,7 @@ protected ListenableFuture go(Request request) throws JsonProcessingEx jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); @@ -502,7 +501,7 @@ protected ListenableFuture go(Request request) throws JsonProcessingEx jsonMapper.writeValueAsString(events.get(counter.getAndIncrement())), jsonMapper.writeValueAsString(events.get(counter.getAndIncrement())) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); @@ -561,7 +560,7 @@ protected ListenableFuture go(Request request) throws IOException jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - baos.toString(Charsets.UTF_8.name()) + baos.toString(StandardCharsets.UTF_8.name()) ); return GoHandlers.immediateFuture(okResponse()); diff --git a/java-util/src/test/java/io/druid/java/util/emitter/core/ParametrizedUriEmitterTest.java b/java-util/src/test/java/io/druid/java/util/emitter/core/ParametrizedUriEmitterTest.java index e9124e8f96af..97a14ef5e6a6 100644 --- a/java-util/src/test/java/io/druid/java/util/emitter/core/ParametrizedUriEmitterTest.java +++ b/java-util/src/test/java/io/druid/java/util/emitter/core/ParametrizedUriEmitterTest.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.lifecycle.Lifecycle; @@ -34,6 +33,7 @@ import org.junit.Before; import org.junit.Test; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -104,7 +104,7 @@ public ListenableFuture go(Request request) throws JsonProcessingExcep jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); @@ -138,7 +138,7 @@ protected ListenableFuture go(Request request) { results.put( request.getUrl().toString(), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); } @@ -178,7 +178,7 @@ protected ListenableFuture go(Request request) throws JsonProcessingEx jsonMapper.writeValueAsString(events.get(0)), jsonMapper.writeValueAsString(events.get(1)) ), - Charsets.UTF_8.decode(request.getByteBufferData().slice()).toString() + StandardCharsets.UTF_8.decode(request.getByteBufferData().slice()).toString() ); return GoHandlers.immediateFuture(okResponse()); diff --git a/java-util/src/test/java/io/druid/java/util/http/client/AsyncHttpClientTest.java b/java-util/src/test/java/io/druid/java/util/http/client/AsyncHttpClientTest.java index 5650fc61976c..f0a576a6980c 100644 --- a/java-util/src/test/java/io/druid/java/util/http/client/AsyncHttpClientTest.java +++ b/java-util/src/test/java/io/druid/java/util/http/client/AsyncHttpClientTest.java @@ -19,7 +19,6 @@ package io.druid.java.util.http.client; -import com.google.common.base.Charsets; import io.druid.java.util.common.StringUtils; import org.asynchttpclient.DefaultAsyncHttpClient; import org.junit.Assert; @@ -64,7 +63,7 @@ public void run() // skip lines } Thread.sleep(5000); // times out - out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(Charsets.UTF_8)); + out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(StandardCharsets.UTF_8)); } catch (Exception e) { // Suppress diff --git a/java-util/src/test/java/io/druid/java/util/http/client/FriendlyServersTest.java b/java-util/src/test/java/io/druid/java/util/http/client/FriendlyServersTest.java index 1496c904bd9c..fb9d0430c055 100644 --- a/java-util/src/test/java/io/druid/java/util/http/client/FriendlyServersTest.java +++ b/java-util/src/test/java/io/druid/java/util/http/client/FriendlyServersTest.java @@ -19,7 +19,6 @@ package io.druid.java.util.http.client; -import com.google.common.base.Charsets; import com.google.common.util.concurrent.ListenableFuture; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.lifecycle.Lifecycle; @@ -81,7 +80,7 @@ public void run() while (!in.readLine().equals("")) { // skip lines } - out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(Charsets.UTF_8)); + out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(StandardCharsets.UTF_8)); } catch (Exception e) { // Suppress @@ -98,7 +97,7 @@ public void run() final StatusResponseHolder response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", serverSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ).get(); Assert.assertEquals(200, response.getStatus().getCode()); @@ -138,7 +137,7 @@ public void run() foundAcceptEncoding.set(true); } } - out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(Charsets.UTF_8)); + out.write("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nhello!".getBytes(StandardCharsets.UTF_8)); } catch (Exception e) { // Suppress @@ -157,7 +156,7 @@ public void run() final StatusResponseHolder response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", serverSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ).get(); Assert.assertEquals(200, response.getStatus().getCode()); @@ -214,7 +213,7 @@ public void testFriendlySelfSignedHttpsServer() throws Exception HttpMethod.GET, new URL(StringUtils.format("https://localhost:%d/", sslConnector.getLocalPort())) ), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ).get().getStatus(); Assert.assertEquals(404, status.getCode()); } @@ -227,7 +226,7 @@ public void testFriendlySelfSignedHttpsServer() throws Exception HttpMethod.GET, new URL(StringUtils.format("https://127.0.0.1:%d/", sslConnector.getLocalPort())) ), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable ea = null; @@ -249,7 +248,7 @@ public void testFriendlySelfSignedHttpsServer() throws Exception new Request( HttpMethod.GET, new URL(StringUtils.format("https://localhost:%d/", sslConnector.getLocalPort())) ), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable eb = null; @@ -285,7 +284,7 @@ public void testHttpBin() throws Throwable final HttpResponseStatus status = client .go( new Request(HttpMethod.GET, new URL("https://httpbin.org/get")), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ).get().getStatus(); Assert.assertEquals(200, status.getCode()); @@ -296,7 +295,7 @@ public void testHttpBin() throws Throwable .go( new Request(HttpMethod.POST, new URL("https://httpbin.org/post")) .setContent(new byte[]{'a', 'b', 'c', 1, 2, 3}), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ).get().getStatus(); Assert.assertEquals(200, status.getCode()); diff --git a/java-util/src/test/java/io/druid/java/util/http/client/JankyServersTest.java b/java-util/src/test/java/io/druid/java/util/http/client/JankyServersTest.java index 1923ba9fb6bd..b75da694eba4 100644 --- a/java-util/src/test/java/io/druid/java/util/http/client/JankyServersTest.java +++ b/java-util/src/test/java/io/druid/java/util/http/client/JankyServersTest.java @@ -19,7 +19,6 @@ package io.druid.java.util.http.client; -import com.google.common.base.Charsets; import com.google.common.util.concurrent.ListenableFuture; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.lifecycle.Lifecycle; @@ -41,6 +40,7 @@ import java.net.ServerSocket; import java.net.Socket; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -153,7 +153,7 @@ public void testHttpSilentServerWithGlobalTimeout() throws Throwable final ListenableFuture future = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", silentServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; @@ -181,7 +181,7 @@ public void testHttpSilentServerWithRequestTimeout() throws Throwable final ListenableFuture future = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", silentServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8), + new StatusResponseHandler(StandardCharsets.UTF_8), new Duration(100L) ); @@ -214,7 +214,7 @@ public void testHttpsSilentServer() throws Throwable final ListenableFuture response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("https://localhost:%d/", silentServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; @@ -242,7 +242,7 @@ public void testHttpConnectionClosingServer() throws Throwable final ListenableFuture response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", closingServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; try { @@ -271,7 +271,7 @@ public void testHttpsConnectionClosingServer() throws Throwable final ListenableFuture response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("https://localhost:%d/", closingServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; @@ -306,7 +306,7 @@ public void testHttpEchoServer() throws Throwable final ListenableFuture response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("http://localhost:%d/", echoServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; @@ -336,7 +336,7 @@ public void testHttpsEchoServer() throws Throwable final ListenableFuture response = client .go( new Request(HttpMethod.GET, new URL(StringUtils.format("https://localhost:%d/", echoServerSocket.getLocalPort()))), - new StatusResponseHandler(Charsets.UTF_8) + new StatusResponseHandler(StandardCharsets.UTF_8) ); Throwable e = null; diff --git a/processing/src/main/java/io/druid/guice/PropertiesModule.java b/processing/src/main/java/io/druid/guice/PropertiesModule.java index 1d838f245764..905c1c867d07 100644 --- a/processing/src/main/java/io/druid/guice/PropertiesModule.java +++ b/processing/src/main/java/io/druid/guice/PropertiesModule.java @@ -19,7 +19,6 @@ package io.druid.guice; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.inject.Binder; import com.google.inject.Module; @@ -33,6 +32,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Properties; @@ -71,7 +71,7 @@ public void configure(Binder binder) if (stream != null) { log.info("Loading properties from %s", propertiesFile); try { - fileProps.load(new InputStreamReader(stream, Charsets.UTF_8)); + fileProps.load(new InputStreamReader(stream, StandardCharsets.UTF_8)); } catch (IOException e) { throw Throwables.propagate(e); diff --git a/processing/src/test/java/io/druid/segment/TestIndex.java b/processing/src/test/java/io/druid/segment/TestIndex.java index a6b03a6dd0d6..970403fabf11 100644 --- a/processing/src/test/java/io/druid/segment/TestIndex.java +++ b/processing/src/test/java/io/druid/segment/TestIndex.java @@ -19,7 +19,6 @@ package io.druid.segment; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.common.io.CharSource; import com.google.common.io.LineProcessor; @@ -37,7 +36,6 @@ import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.Intervals; import io.druid.java.util.common.logger.Logger; -import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleMaxAggregatorFactory; import io.druid.query.aggregation.DoubleMinAggregatorFactory; @@ -53,11 +51,13 @@ import io.druid.segment.incremental.IncrementalIndexSchema; import io.druid.segment.serde.ComplexMetrics; import io.druid.segment.virtual.ExpressionVirtualColumn; +import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; import org.joda.time.Interval; import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -261,7 +261,7 @@ public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, throw new IllegalArgumentException("cannot find resource " + resourceFilename); } log.info("Realtime loading index file[%s]", resource); - CharSource stream = Resources.asByteSource(resource).asCharSource(Charsets.UTF_8); + CharSource stream = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8); return makeRealtimeIndex(stream, rollup); } diff --git a/server/src/main/java/io/druid/client/CachingClusteredClient.java b/server/src/main/java/io/druid/client/CachingClusteredClient.java index df48cc0f21ef..2be0a819a351 100644 --- a/server/src/main/java/io/druid/client/CachingClusteredClient.java +++ b/server/src/main/java/io/druid/client/CachingClusteredClient.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.collect.ImmutableMap; @@ -81,6 +80,7 @@ import javax.annotation.Nullable; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -382,7 +382,7 @@ private String computeCurrentEtag(final Set segments, @Nullable hasOnlyHistoricalSegments = false; break; } - hasher.putString(p.getServer().getSegment().getIdentifier(), Charsets.UTF_8); + hasher.putString(p.getServer().getSegment().getIdentifier(), StandardCharsets.UTF_8); } if (hasOnlyHistoricalSegments) { diff --git a/server/src/main/java/io/druid/client/DirectDruidClient.java b/server/src/main/java/io/druid/client/DirectDruidClient.java index d34efee089de..8db791b1c9ba 100644 --- a/server/src/main/java/io/druid/client/DirectDruidClient.java +++ b/server/src/main/java/io/druid/client/DirectDruidClient.java @@ -27,19 +27,11 @@ import com.fasterxml.jackson.databind.type.TypeFactory; import com.fasterxml.jackson.dataformat.smile.SmileFactory; import com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import com.google.common.io.ByteSource; import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; -import io.druid.java.util.emitter.service.ServiceEmitter; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.ClientResponse; -import io.druid.java.util.http.client.response.HttpResponseHandler; -import io.druid.java.util.http.client.response.StatusResponseHandler; -import io.druid.java.util.http.client.response.StatusResponseHolder; import io.druid.java.util.common.IAE; import io.druid.java.util.common.Pair; import io.druid.java.util.common.RE; @@ -50,6 +42,13 @@ import io.druid.java.util.common.guava.Sequences; import io.druid.java.util.common.jackson.JacksonUtils; import io.druid.java.util.common.logger.Logger; +import io.druid.java.util.emitter.service.ServiceEmitter; +import io.druid.java.util.http.client.HttpClient; +import io.druid.java.util.http.client.Request; +import io.druid.java.util.http.client.response.ClientResponse; +import io.druid.java.util.http.client.response.HttpResponseHandler; +import io.druid.java.util.http.client.response.StatusResponseHandler; +import io.druid.java.util.http.client.response.StatusResponseHolder; import io.druid.query.BySegmentResultValueClass; import io.druid.query.Query; import io.druid.query.QueryContexts; @@ -77,6 +76,7 @@ import java.io.InputStream; import java.io.SequenceInputStream; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.Enumeration; import java.util.Iterator; import java.util.Map; @@ -480,7 +480,7 @@ public void onFailure(Throwable t) ? SmileMediaTypes.APPLICATION_JACKSON_SMILE : MediaType.APPLICATION_JSON ), - new StatusResponseHandler(Charsets.UTF_8), + new StatusResponseHandler(StandardCharsets.UTF_8), Duration.standardSeconds(1) ).get(1, TimeUnit.SECONDS); diff --git a/server/src/main/java/io/druid/client/cache/MemcachedCache.java b/server/src/main/java/io/druid/client/cache/MemcachedCache.java index 023791242238..7010134b2da5 100644 --- a/server/src/main/java/io/druid/client/cache/MemcachedCache.java +++ b/server/src/main/java/io/druid/client/cache/MemcachedCache.java @@ -19,7 +19,6 @@ package io.druid.client.cache; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -56,6 +55,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -87,7 +87,7 @@ public class MemcachedCache implements Cache @Override public long hash(String k) { - return fn.hashString(k, Charsets.UTF_8).asLong(); + return fn.hashString(k, StandardCharsets.UTF_8).asLong(); } @Override diff --git a/server/src/main/java/io/druid/discovery/DruidLeaderClient.java b/server/src/main/java/io/druid/discovery/DruidLeaderClient.java index a77b8312937c..c1c4dbcaa300 100644 --- a/server/src/main/java/io/druid/discovery/DruidLeaderClient.java +++ b/server/src/main/java/io/druid/discovery/DruidLeaderClient.java @@ -19,14 +19,8 @@ package io.druid.discovery; -import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.FullResponseHandler; -import io.druid.java.util.http.client.response.FullResponseHolder; -import io.druid.java.util.http.client.response.HttpResponseHandler; import io.druid.client.selector.Server; import io.druid.concurrent.LifecycleLock; import io.druid.curator.discovery.ServerDiscoverySelector; @@ -37,6 +31,11 @@ import io.druid.java.util.common.lifecycle.LifecycleStart; import io.druid.java.util.common.lifecycle.LifecycleStop; import io.druid.java.util.common.logger.Logger; +import io.druid.java.util.http.client.HttpClient; +import io.druid.java.util.http.client.Request; +import io.druid.java.util.http.client.response.FullResponseHandler; +import io.druid.java.util.http.client.response.FullResponseHolder; +import io.druid.java.util.http.client.response.HttpResponseHandler; import org.jboss.netty.channel.ChannelException; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; @@ -45,6 +44,7 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -130,7 +130,7 @@ public Request makeRequest(HttpMethod httpMethod, String urlPath) throws IOExcep public FullResponseHolder go(Request request) throws IOException, InterruptedException { - return go(request, new FullResponseHandler(Charsets.UTF_8)); + return go(request, new FullResponseHandler(StandardCharsets.UTF_8)); } /** diff --git a/server/src/main/java/io/druid/server/log/FileRequestLogger.java b/server/src/main/java/io/druid/server/log/FileRequestLogger.java index db86c29beb93..cb0edc795cd8 100644 --- a/server/src/main/java/io/druid/server/log/FileRequestLogger.java +++ b/server/src/main/java/io/druid/server/log/FileRequestLogger.java @@ -20,7 +20,6 @@ package io.druid.server.log; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.base.Throwables; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.StringUtils; @@ -39,6 +38,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.util.concurrent.Callable; import java.util.concurrent.ScheduledExecutorService; @@ -112,7 +112,7 @@ private OutputStreamWriter getFileWriter() throws FileNotFoundException { return new OutputStreamWriter( new FileOutputStream(new File(baseDir, currentDay.toString("yyyy-MM-dd'.log'")), true), - Charsets.UTF_8 + StandardCharsets.UTF_8 ); } diff --git a/server/src/main/java/io/druid/server/router/ConsistentHasher.java b/server/src/main/java/io/druid/server/router/ConsistentHasher.java index f3fee3024bea..253e46bf0aed 100644 --- a/server/src/main/java/io/druid/server/router/ConsistentHasher.java +++ b/server/src/main/java/io/druid/server/router/ConsistentHasher.java @@ -19,13 +19,13 @@ package io.druid.server.router; -import com.google.common.base.Charsets; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import it.unimi.dsi.fastutil.longs.Long2ObjectMap; import it.unimi.dsi.fastutil.longs.Long2ObjectRBTreeMap; import it.unimi.dsi.fastutil.longs.Long2ObjectSortedMap; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -119,7 +119,7 @@ private void addNodeKeyHashes(String key) long[] hashes = new long[REPLICATION_FACTOR]; for (int i = 0; i < REPLICATION_FACTOR; i++) { String vnode = key + "-" + i; - hashes[i] = hashFn.hashString(vnode, Charsets.UTF_8).asLong(); + hashes[i] = hashFn.hashString(vnode, StandardCharsets.UTF_8).asLong(); } nodeKeyHashes.put(key, hashes); diff --git a/server/src/main/java/io/druid/server/router/RendezvousHasher.java b/server/src/main/java/io/druid/server/router/RendezvousHasher.java index 170573ef2d96..0aab0bec33bb 100644 --- a/server/src/main/java/io/druid/server/router/RendezvousHasher.java +++ b/server/src/main/java/io/druid/server/router/RendezvousHasher.java @@ -19,12 +19,12 @@ package io.druid.server.router; -import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.google.common.hash.HashCode; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Set; @@ -47,7 +47,7 @@ public String chooseNode(Set nodeIds, byte[] key) String maxNode = null; for (String nodeId : nodeIds) { - HashCode nodeHash = HASH_FN.hashString(nodeId, Charsets.UTF_8); + HashCode nodeHash = HASH_FN.hashString(nodeId, StandardCharsets.UTF_8); List hashes = Lists.newArrayList(nodeHash, keyHash); long combinedHash = Hashing.combineOrdered(hashes).asLong(); if (maxNode == null) { diff --git a/server/src/test/java/io/druid/client/CachingClusteredClientTest.java b/server/src/test/java/io/druid/client/CachingClusteredClientTest.java index 25901cfeffb8..39ec5f1e5e12 100644 --- a/server/src/test/java/io/druid/client/CachingClusteredClientTest.java +++ b/server/src/test/java/io/druid/client/CachingClusteredClientTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.fasterxml.jackson.dataformat.smile.SmileFactory; -import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; @@ -150,6 +149,7 @@ import org.junit.runners.Parameterized; import javax.annotation.Nullable; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -1470,8 +1470,8 @@ public void testGroupByCaching() .setContext(CONTEXT); final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector(); - collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes()); - collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes()); + collector.add(hashFn.hashString("abc123", StandardCharsets.UTF_8).asBytes()); + collector.add(hashFn.hashString("123abc", StandardCharsets.UTF_8).asBytes()); testQueryCaching( getDefaultQueryRunner(), diff --git a/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java b/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java index 86da576fb4e4..9dd8f07cf2fb 100644 --- a/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java +++ b/server/src/test/java/io/druid/segment/indexing/DataSchemaTest.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.JsonMappingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import io.druid.data.input.InputRow; @@ -50,6 +49,7 @@ import org.junit.rules.ExpectedException; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Map; @@ -170,7 +170,7 @@ public void testTransformSpec() final StringInputRowParser parser = (StringInputRowParser) schema.getParser(); final InputRow row1bb = parser.parseBatch( - ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(Charsets.UTF_8)) + ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8)) ).get(0); Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp()); Assert.assertEquals("foo", row1bb.getRaw("dimA")); @@ -182,7 +182,7 @@ public void testTransformSpec() Assert.assertEquals("foofoo", row1string.getRaw("expr")); final InputRow row2 = parser.parseBatch( - ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(Charsets.UTF_8)) + ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8)) ).get(0); Assert.assertNull(row2); } diff --git a/server/src/test/java/io/druid/segment/realtime/firehose/IngestSegmentFirehoseTest.java b/server/src/test/java/io/druid/segment/realtime/firehose/IngestSegmentFirehoseTest.java index 4b363080659c..52a15025680e 100644 --- a/server/src/test/java/io/druid/segment/realtime/firehose/IngestSegmentFirehoseTest.java +++ b/server/src/test/java/io/druid/segment/realtime/firehose/IngestSegmentFirehoseTest.java @@ -19,7 +19,6 @@ package io.druid.segment.realtime.firehose; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.druid.collections.spatial.search.RadiusBound; @@ -59,6 +58,7 @@ import org.junit.runners.Parameterized; import java.io.File; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.List; @@ -212,7 +212,7 @@ private void createTestIndex(File segmentDir) throws Exception false, 0 ), - Charsets.UTF_8.toString() + StandardCharsets.UTF_8.toString() ); try ( diff --git a/server/src/test/java/io/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java b/server/src/test/java/io/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java index ccd00941b231..436272c1f068 100644 --- a/server/src/test/java/io/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java +++ b/server/src/test/java/io/druid/segment/realtime/firehose/LocalFirehoseFactoryTest.java @@ -19,7 +19,6 @@ package io.druid.segment.realtime.firehose; -import com.google.common.base.Charsets; import com.google.common.collect.Lists; import io.druid.data.input.Firehose; import io.druid.data.input.Row; @@ -89,7 +88,7 @@ public void testConnect() throws IOException false, 0 ), - Charsets.UTF_8.name() + StandardCharsets.UTF_8.name() ), null)) { final List rows = new ArrayList<>(); while (firehose.hasMore()) { diff --git a/server/src/test/java/io/druid/server/log/LoggingRequestLoggerTest.java b/server/src/test/java/io/druid/server/log/LoggingRequestLoggerTest.java index bdcb0ded63cb..53dec6a6d0c9 100644 --- a/server/src/test/java/io/druid/server/log/LoggingRequestLoggerTest.java +++ b/server/src/test/java/io/druid/server/log/LoggingRequestLoggerTest.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.annotation.JsonTypeName; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Charsets; import com.google.common.collect.ImmutableMap; import io.druid.jackson.DefaultObjectMapper; import io.druid.java.util.common.DateTimes; @@ -51,6 +50,7 @@ import org.junit.Test; import java.io.ByteArrayOutputStream; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -99,7 +99,7 @@ public static void setUpStatic() .newBuilder() .setName("test stream") .setTarget(baos) - .setLayout(JsonLayout.createLayout(false, true, false, true, true, Charsets.UTF_8)) + .setLayout(JsonLayout.createLayout(false, true, false, true, true, StandardCharsets.UTF_8)) .build(); final Logger logger = (Logger) LogManager.getLogger(LoggingRequestLogger.class); From 30fc4d3ba0c43409ea9b0ef228c017345c02ef0b Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 29 Mar 2018 10:30:12 -0700 Subject: [PATCH 12/67] Coordinator balancer move then drop fix (#5528) * #5521 part 1 * formatting * oops * less magic tests --- .../AbstractCuratorServerInventoryView.java | 63 +-- .../coordinator/CuratorLoadQueuePeon.java | 70 +-- .../server/coordinator/DruidCoordinator.java | 128 ++--- .../helper/DruidCoordinatorRuleRunner.java | 6 +- .../io/druid/curator/CuratorTestBase.java | 51 ++ .../CuratorDruidCoordinatorTest.java | 534 ++++++++++++++++++ 6 files changed, 674 insertions(+), 178 deletions(-) create mode 100644 server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java diff --git a/server/src/main/java/io/druid/client/AbstractCuratorServerInventoryView.java b/server/src/main/java/io/druid/client/AbstractCuratorServerInventoryView.java index d6c731cdc58f..3c7159c0c2a8 100644 --- a/server/src/main/java/io/druid/client/AbstractCuratorServerInventoryView.java +++ b/server/src/main/java/io/druid/client/AbstractCuratorServerInventoryView.java @@ -23,17 +23,16 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Throwables; -import io.druid.java.util.emitter.EmittingLogger; -import io.druid.java.util.common.concurrent.Execs; import io.druid.curator.inventory.CuratorInventoryManager; import io.druid.curator.inventory.CuratorInventoryManagerStrategy; import io.druid.curator.inventory.InventoryManagerConfig; import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.concurrent.Execs; import io.druid.java.util.common.lifecycle.LifecycleStart; import io.druid.java.util.common.lifecycle.LifecycleStop; +import io.druid.java.util.emitter.EmittingLogger; import io.druid.timeline.DataSegment; import org.apache.curator.framework.CuratorFramework; -import org.apache.curator.utils.ZKPaths; import java.io.IOException; import java.util.Collection; @@ -157,14 +156,7 @@ public void inventoryInitialized() { log.info("Inventory Initialized"); runSegmentCallbacks( - new Function() - { - @Override - public CallbackAction apply(SegmentCallback input) - { - return input.segmentViewInitialized(); - } - } + input -> input.segmentViewInitialized() ); } } @@ -233,15 +225,10 @@ protected void runSegmentCallbacks( { for (final Map.Entry entry : segmentCallbacks.entrySet()) { entry.getValue().execute( - new Runnable() - { - @Override - public void run() - { - if (CallbackAction.UNREGISTER == fn.apply(entry.getKey())) { - segmentCallbackRemoved(entry.getKey()); - segmentCallbacks.remove(entry.getKey()); - } + () -> { + if (CallbackAction.UNREGISTER == fn.apply(entry.getKey())) { + segmentCallbackRemoved(entry.getKey()); + segmentCallbacks.remove(entry.getKey()); } } ); @@ -252,14 +239,9 @@ private void runServerRemovedCallbacks(final DruidServer server) { for (final Map.Entry entry : serverRemovedCallbacks.entrySet()) { entry.getValue().execute( - new Runnable() - { - @Override - public void run() - { - if (CallbackAction.UNREGISTER == entry.getKey().serverRemoved(server)) { - serverRemovedCallbacks.remove(entry.getKey()); - } + () -> { + if (CallbackAction.UNREGISTER == entry.getKey().serverRemoved(server)) { + serverRemovedCallbacks.remove(entry.getKey()); } } ); @@ -286,14 +268,7 @@ protected void addSingleInventory( container.addDataSegment(inventory); runSegmentCallbacks( - new Function() - { - @Override - public CallbackAction apply(SegmentCallback input) - { - return input.segmentAdded(container.getMetadata(), inventory); - } - } + input -> input.segmentAdded(container.getMetadata(), inventory) ); } @@ -315,14 +290,7 @@ protected void removeSingleInventory(final DruidServer container, String invento container.removeDataSegment(inventoryKey); runSegmentCallbacks( - new Function() - { - @Override - public CallbackAction apply(SegmentCallback input) - { - return input.segmentRemoved(container.getMetadata(), segment); - } - } + input -> input.segmentRemoved(container.getMetadata(), segment) ); } @@ -330,11 +298,8 @@ public CallbackAction apply(SegmentCallback input) public boolean isSegmentLoadedByServer(String serverKey, DataSegment segment) { try { - String toServedSegPath = ZKPaths.makePath( - ZKPaths.makePath(getInventoryManagerConfig().getInventoryPath(), serverKey), - segment.getIdentifier() - ); - return curator.checkExists().forPath(toServedSegPath) != null; + DruidServer server = getInventoryValue(serverKey); + return server != null && server.getSegment(segment.getIdentifier()) != null; } catch (Exception ex) { throw Throwables.propagate(ex); diff --git a/server/src/main/java/io/druid/server/coordinator/CuratorLoadQueuePeon.java b/server/src/main/java/io/druid/server/coordinator/CuratorLoadQueuePeon.java index 08c1078110b5..51bf161f5f0f 100644 --- a/server/src/main/java/io/druid/server/coordinator/CuratorLoadQueuePeon.java +++ b/server/src/main/java/io/druid/server/coordinator/CuratorLoadQueuePeon.java @@ -34,14 +34,12 @@ import org.apache.curator.framework.api.CuratorWatcher; import org.apache.curator.utils.ZKPaths; import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.data.Stat; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Set; -import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ExecutorService; @@ -249,7 +247,7 @@ private void processSegmentChangeRequest() if (currentlyProcessing == null) { if (!stopped) { log.makeAlert("Crazy race condition! server[%s]", basePath) - .emit(); + .emit(); } actionCompleted(); return; @@ -261,38 +259,28 @@ private void processSegmentChangeRequest() curator.create().withMode(CreateMode.EPHEMERAL).forPath(path, payload); processingExecutor.schedule( - new Runnable() - { - @Override - public void run() - { - try { - if (curator.checkExists().forPath(path) != null) { - failAssign(new ISE("%s was never removed! Failing this operation!", path)); - } - } - catch (Exception e) { - failAssign(e); + () -> { + try { + if (curator.checkExists().forPath(path) != null) { + failAssign(new ISE("%s was never removed! Failing this operation!", path)); } } + catch (Exception e) { + failAssign(e); + } }, config.getLoadTimeoutDelay().getMillis(), TimeUnit.MILLISECONDS ); final Stat stat = curator.checkExists().usingWatcher( - new CuratorWatcher() - { - @Override - public void process(WatchedEvent watchedEvent) - { - switch (watchedEvent.getType()) { - case NodeDeleted: - entryRemoved(watchedEvent.getPath()); - break; - default: - // do nothing - } + (CuratorWatcher) watchedEvent -> { + switch (watchedEvent.getType()) { + case NodeDeleted: + entryRemoved(watchedEvent.getPath()); + break; + default: + // do nothing } } ).forPath(path); @@ -341,14 +329,7 @@ private void actionCompleted() final List callbacks = currentlyProcessing.getCallbacks(); currentlyProcessing = null; callBackExecutor.execute( - new Runnable() - { - @Override - public void run() - { - executeCallbacks(callbacks); - } - } + () -> executeCallbacks(callbacks) ); } } @@ -360,18 +341,13 @@ public void start() processingExecutor, config.getLoadQueuePeonRepeatDelay(), config.getLoadQueuePeonRepeatDelay(), - new Callable() - { - @Override - public ScheduledExecutors.Signal call() - { - processSegmentChangeRequest(); - - if (stopped) { - return ScheduledExecutors.Signal.STOP; - } else { - return ScheduledExecutors.Signal.REPEAT; - } + () -> { + processSegmentChangeRequest(); + + if (stopped) { + return ScheduledExecutors.Signal.STOP; + } else { + return ScheduledExecutors.Signal.REPEAT; } } ); diff --git a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java index 5ee55f917c2b..0280045c3a9d 100644 --- a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java @@ -20,7 +20,6 @@ package io.druid.server.coordinator; import com.google.common.base.Function; -import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; @@ -30,8 +29,6 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.inject.Inject; -import io.druid.java.util.emitter.EmittingLogger; -import io.druid.java.util.emitter.service.ServiceEmitter; import io.druid.client.DruidDataSource; import io.druid.client.DruidServer; import io.druid.client.ImmutableDruidDataSource; @@ -56,6 +53,8 @@ import io.druid.java.util.common.guava.FunctionalIterable; import io.druid.java.util.common.lifecycle.LifecycleStart; import io.druid.java.util.common.lifecycle.LifecycleStop; +import io.druid.java.util.emitter.EmittingLogger; +import io.druid.java.util.emitter.service.ServiceEmitter; import io.druid.metadata.MetadataRuleManager; import io.druid.metadata.MetadataSegmentManager; import io.druid.server.DruidNode; @@ -98,14 +97,8 @@ public class DruidCoordinator { public static Comparator SEGMENT_COMPARATOR = Ordering.from(Comparators.intervalsByEndThenStart()) .onResultOf( - new Function() - { - @Override - public Interval apply(DataSegment segment) - { - return segment.getInterval(); - } - }) + (Function) segment -> segment + .getInterval()) .compound(Ordering.natural()) .reverse(); @@ -572,7 +565,8 @@ public ScheduledExecutors.Signal call() if (coordLeaderSelector.isLeader() && startingLeaderCounter == coordLeaderSelector.localTerm()) { theRunnable.run(); } - if (coordLeaderSelector.isLeader() && startingLeaderCounter == coordLeaderSelector.localTerm()) { // (We might no longer be leader) + if (coordLeaderSelector.isLeader() + && startingLeaderCounter == coordLeaderSelector.localTerm()) { // (We might no longer be leader) return ScheduledExecutors.Signal.REPEAT; } else { return ScheduledExecutors.Signal.STOP; @@ -697,82 +691,58 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) super( ImmutableList.of( new DruidCoordinatorSegmentInfoLoader(DruidCoordinator.this), - new DruidCoordinatorHelper() - { - @Override - public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) - { - // Display info about all historical servers - Iterable servers = FunctionalIterable - .create(serverInventoryView.getInventory()) - .filter( - new Predicate() - { - @Override - public boolean apply( - DruidServer input - ) - { - return input.segmentReplicatable(); - } - } - ).transform( - new Function() - { - @Override - public ImmutableDruidServer apply(DruidServer input) - { - return input.toImmutableDruidServer(); - } - } - ); - - if (log.isDebugEnabled()) { - log.debug("Servers"); - for (ImmutableDruidServer druidServer : servers) { - log.debug(" %s", druidServer); - log.debug(" -- DataSources"); - for (ImmutableDruidDataSource druidDataSource : druidServer.getDataSources()) { - log.debug(" %s", druidDataSource); - } + params -> { + // Display info about all historical servers + Iterable servers = FunctionalIterable + .create(serverInventoryView.getInventory()) + .filter(DruidServer::segmentReplicatable) + .transform(DruidServer::toImmutableDruidServer); + + if (log.isDebugEnabled()) { + log.debug("Servers"); + for (ImmutableDruidServer druidServer : servers) { + log.debug(" %s", druidServer); + log.debug(" -- DataSources"); + for (ImmutableDruidDataSource druidDataSource : druidServer.getDataSources()) { + log.debug(" %s", druidDataSource); } } + } - // Find all historical servers, group them by subType and sort by ascending usage - final DruidCluster cluster = new DruidCluster(); - for (ImmutableDruidServer server : servers) { - if (!loadManagementPeons.containsKey(server.getName())) { - LoadQueuePeon loadQueuePeon = taskMaster.giveMePeon(server); - loadQueuePeon.start(); - log.info("Created LoadQueuePeon for server[%s].", server.getName()); + // Find all historical servers, group them by subType and sort by ascending usage + final DruidCluster cluster = new DruidCluster(); + for (ImmutableDruidServer server : servers) { + if (!loadManagementPeons.containsKey(server.getName())) { + LoadQueuePeon loadQueuePeon = taskMaster.giveMePeon(server); + loadQueuePeon.start(); + log.info("Created LoadQueuePeon for server[%s].", server.getName()); - loadManagementPeons.put(server.getName(), loadQueuePeon); - } - - cluster.add(new ServerHolder(server, loadManagementPeons.get(server.getName()))); + loadManagementPeons.put(server.getName(), loadQueuePeon); } - segmentReplicantLookup = SegmentReplicantLookup.make(cluster); + cluster.add(new ServerHolder(server, loadManagementPeons.get(server.getName()))); + } - // Stop peons for servers that aren't there anymore. - final Set disappeared = Sets.newHashSet(loadManagementPeons.keySet()); - for (ImmutableDruidServer server : servers) { - disappeared.remove(server.getName()); - } - for (String name : disappeared) { - log.info("Removing listener for server[%s] which is no longer there.", name); - LoadQueuePeon peon = loadManagementPeons.remove(name); - peon.stop(); - } + segmentReplicantLookup = SegmentReplicantLookup.make(cluster); - return params.buildFromExisting() - .withDruidCluster(cluster) - .withDatabaseRuleManager(metadataRuleManager) - .withLoadManagementPeons(loadManagementPeons) - .withSegmentReplicantLookup(segmentReplicantLookup) - .withBalancerReferenceTimestamp(DateTimes.nowUtc()) - .build(); + // Stop peons for servers that aren't there anymore. + final Set disappeared = Sets.newHashSet(loadManagementPeons.keySet()); + for (ImmutableDruidServer server : servers) { + disappeared.remove(server.getName()); + } + for (String name : disappeared) { + log.info("Removing listener for server[%s] which is no longer there.", name); + LoadQueuePeon peon = loadManagementPeons.remove(name); + peon.stop(); } + + return params.buildFromExisting() + .withDruidCluster(cluster) + .withDatabaseRuleManager(metadataRuleManager) + .withLoadManagementPeons(loadManagementPeons) + .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(DateTimes.nowUtc()) + .build(); }, new DruidCoordinatorRuleRunner(DruidCoordinator.this), new DruidCoordinatorCleanupUnneeded(DruidCoordinator.this), diff --git a/server/src/main/java/io/druid/server/coordinator/helper/DruidCoordinatorRuleRunner.java b/server/src/main/java/io/druid/server/coordinator/helper/DruidCoordinatorRuleRunner.java index 5fa0ff17b220..47191606c3b1 100644 --- a/server/src/main/java/io/druid/server/coordinator/helper/DruidCoordinatorRuleRunner.java +++ b/server/src/main/java/io/druid/server/coordinator/helper/DruidCoordinatorRuleRunner.java @@ -20,9 +20,9 @@ package io.druid.server.coordinator.helper; import com.google.common.collect.Lists; -import io.druid.java.util.common.guava.Comparators; -import io.druid.java.util.emitter.EmittingLogger; +import com.google.common.collect.Ordering; import io.druid.java.util.common.DateTimes; +import io.druid.java.util.emitter.EmittingLogger; import io.druid.metadata.MetadataRuleManager; import io.druid.server.coordinator.CoordinatorStats; import io.druid.server.coordinator.DruidCluster; @@ -92,7 +92,7 @@ public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) for (DataSegment segment : params.getAvailableSegments()) { VersionedIntervalTimeline timeline = timelines.get(segment.getDataSource()); if (timeline == null) { - timeline = new VersionedIntervalTimeline<>(Comparators.comparable()); + timeline = new VersionedIntervalTimeline<>(Ordering.natural()); timelines.put(segment.getDataSource(), timeline); } diff --git a/server/src/test/java/io/druid/curator/CuratorTestBase.java b/server/src/test/java/io/druid/curator/CuratorTestBase.java index 6ad32306172b..502d4c83a2e4 100644 --- a/server/src/test/java/io/druid/curator/CuratorTestBase.java +++ b/server/src/test/java/io/druid/curator/CuratorTestBase.java @@ -23,6 +23,8 @@ import com.google.common.base.Throwables; import com.google.common.collect.ImmutableSet; import io.druid.client.DruidServer; +import io.druid.common.utils.UUIDUtils; +import io.druid.java.util.common.DateTimes; import io.druid.server.initialization.ZkPathsConfig; import io.druid.timeline.DataSegment; import org.apache.curator.framework.CuratorFramework; @@ -44,6 +46,8 @@ public class CuratorTestBase protected Timing timing; protected CuratorFramework curator; + private int batchCtr = 0; + protected void setupServerAndCurator() throws Exception { server = new TestingServer(); @@ -127,6 +131,47 @@ protected void announceSegmentForServer( } } + protected String announceBatchSegmentsForServer( + DruidServer druidServer, + ImmutableSet segments, + ZkPathsConfig zkPathsConfig, + ObjectMapper jsonMapper + ) + { + final String segmentAnnouncementPath = ZKPaths.makePath(ZKPaths.makePath( + zkPathsConfig.getLiveSegmentsPath(), + druidServer.getHost()), + UUIDUtils.generateUuid( + druidServer.getHost(), + druidServer.getType().toString(), + druidServer.getTier(), + DateTimes.nowUtc().toString() + ) + String.valueOf(batchCtr++) + ); + + + try { + curator.create() + .compressed() + .withMode(CreateMode.EPHEMERAL) + .forPath(segmentAnnouncementPath, jsonMapper.writeValueAsBytes(segments)); + } + catch (KeeperException.NodeExistsException e) { + try { + curator.setData() + .forPath(segmentAnnouncementPath, jsonMapper.writeValueAsBytes(segments)); + } + catch (Exception e1) { + Throwables.propagate(e1); + } + } + catch (Exception e) { + Throwables.propagate(e); + } + + return segmentAnnouncementPath; + } + protected void unannounceSegmentForServer(DruidServer druidServer, DataSegment segment, ZkPathsConfig zkPathsConfig) throws Exception { @@ -138,6 +183,12 @@ protected void unannounceSegmentForServer(DruidServer druidServer, DataSegment s ); } + protected void unannounceSegmentFromBatchForServer(DruidServer druidServer, DataSegment segment, String batchPath, ZkPathsConfig zkPathsConfig) + throws Exception + { + curator.delete().guaranteed().forPath(batchPath); + } + protected void tearDownServerAndCurator() { try { diff --git a/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java b/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java new file mode 100644 index 000000000000..19e9c3a1bcf4 --- /dev/null +++ b/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java @@ -0,0 +1,534 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.server.coordinator; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Predicates; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import io.druid.client.BatchServerInventoryView; +import io.druid.client.CoordinatorServerView; +import io.druid.client.DruidServer; +import io.druid.client.ImmutableDruidDataSource; +import io.druid.common.config.JacksonConfigManager; +import io.druid.curator.CuratorTestBase; +import io.druid.curator.discovery.NoopServiceAnnouncer; +import io.druid.discovery.DruidLeaderSelector; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; +import io.druid.java.util.common.concurrent.Execs; +import io.druid.java.util.common.concurrent.ScheduledExecutorFactory; +import io.druid.metadata.MetadataRuleManager; +import io.druid.metadata.MetadataSegmentManager; +import io.druid.segment.TestHelper; +import io.druid.server.DruidNode; +import io.druid.server.coordination.DruidServerMetadata; +import io.druid.server.coordination.ServerType; +import io.druid.server.initialization.ZkPathsConfig; +import io.druid.server.lookup.cache.LookupCoordinatorManager; +import io.druid.server.metrics.NoopServiceEmitter; +import io.druid.timeline.DataSegment; +import io.druid.timeline.partition.NoneShardSpec; +import org.apache.curator.framework.recipes.cache.PathChildrenCache; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; +import org.apache.curator.utils.ZKPaths; +import org.easymock.EasyMock; +import org.joda.time.Duration; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +/** + * This tests zookeeper specific coordinator/load queue/historical interactions, such as moving segments by the balancer + */ +public class CuratorDruidCoordinatorTest extends CuratorTestBase +{ + private DruidCoordinator coordinator; + private MetadataSegmentManager databaseSegmentManager; + private ScheduledExecutorFactory scheduledExecutorFactory; + private ConcurrentMap loadManagementPeons; + private LoadQueuePeon sourceLoadQueuePeon; + private LoadQueuePeon destinationLoadQueuePeon; + private MetadataRuleManager metadataRuleManager; + private CountDownLatch leaderAnnouncerLatch; + private CountDownLatch leaderUnannouncerLatch; + private PathChildrenCache sourceLoadQueueChildrenCache; + private PathChildrenCache destinationLoadQueueChildrenCache; + private DruidCoordinatorConfig druidCoordinatorConfig; + private ObjectMapper objectMapper; + private JacksonConfigManager configManager; + private DruidNode druidNode; + private static final String SEGPATH = "/druid/segments"; + private static final String SOURCE_LOAD_PATH = "/druid/loadQueue/localhost:1"; + private static final String DESTINATION_LOAD_PATH = "/druid/loadQueue/localhost:2"; + private static final long COORDINATOR_START_DELAY = 1; + private static final long COORDINATOR_PERIOD = 100; + + private BatchServerInventoryView baseView; + private CoordinatorServerView serverView; + private CountDownLatch segmentViewInitLatch; + private CountDownLatch segmentAddedLatch; + private CountDownLatch segmentRemovedLatch; + private final ObjectMapper jsonMapper; + private final ZkPathsConfig zkPathsConfig; + + public CuratorDruidCoordinatorTest() + { + jsonMapper = TestHelper.makeJsonMapper(); + zkPathsConfig = new ZkPathsConfig(); + } + + @Before + public void setUp() throws Exception + { + databaseSegmentManager = EasyMock.createNiceMock(MetadataSegmentManager.class); + metadataRuleManager = EasyMock.createNiceMock(MetadataRuleManager.class); + configManager = EasyMock.createNiceMock(JacksonConfigManager.class); + EasyMock.expect( + configManager.watch( + EasyMock.eq(CoordinatorDynamicConfig.CONFIG_KEY), + EasyMock.anyObject(Class.class), + EasyMock.anyObject() + ) + ).andReturn(new AtomicReference(CoordinatorDynamicConfig.builder().build())).anyTimes(); + EasyMock.expect( + configManager.watch( + EasyMock.eq(CoordinatorCompactionConfig.CONFIG_KEY), + EasyMock.anyObject(Class.class), + EasyMock.anyObject() + ) + ).andReturn(new AtomicReference(CoordinatorCompactionConfig.empty())).anyTimes(); + EasyMock.replay(configManager); + + setupServerAndCurator(); + curator.start(); + curator.blockUntilConnected(); + curator.create().creatingParentsIfNeeded().forPath(SEGPATH); + curator.create().creatingParentsIfNeeded().forPath(SOURCE_LOAD_PATH); + curator.create().creatingParentsIfNeeded().forPath(DESTINATION_LOAD_PATH); + + objectMapper = new DefaultObjectMapper(); + druidCoordinatorConfig = new TestDruidCoordinatorConfig( + new Duration(COORDINATOR_START_DELAY), + new Duration(COORDINATOR_PERIOD), + null, + null, + new Duration(COORDINATOR_PERIOD), + null, + 10, + null, + false, + false, + new Duration("PT0s") + ); + sourceLoadQueueChildrenCache = new PathChildrenCache( + curator, + SOURCE_LOAD_PATH, + true, + true, + Execs.singleThreaded("coordinator_test_path_children_cache_src-%d") + ); + destinationLoadQueueChildrenCache = new PathChildrenCache( + curator, + DESTINATION_LOAD_PATH, + true, + true, + Execs.singleThreaded("coordinator_test_path_children_cache_dest-%d") + ); + sourceLoadQueuePeon = new CuratorLoadQueuePeon( + curator, + SOURCE_LOAD_PATH, + objectMapper, + Execs.scheduledSingleThreaded("coordinator_test_load_queue_peon_src_scheduled-%d"), + Execs.singleThreaded("coordinator_test_load_queue_peon_src-%d"), + druidCoordinatorConfig + ); + destinationLoadQueuePeon = new CuratorLoadQueuePeon( + curator, + DESTINATION_LOAD_PATH, + objectMapper, + Execs.scheduledSingleThreaded("coordinator_test_load_queue_peon_dest_scheduled-%d"), + Execs.singleThreaded("coordinator_test_load_queue_peon_dest-%d"), + druidCoordinatorConfig + ); + druidNode = new DruidNode("hey", "what", 1234, null, true, false); + loadManagementPeons = new ConcurrentHashMap<>(); + scheduledExecutorFactory = (corePoolSize, nameFormat) -> Executors.newSingleThreadScheduledExecutor(); + leaderAnnouncerLatch = new CountDownLatch(1); + leaderUnannouncerLatch = new CountDownLatch(1); + coordinator = new DruidCoordinator( + druidCoordinatorConfig, + new ZkPathsConfig() + { + + @Override + public String getBase() + { + return "druid"; + } + }, + configManager, + databaseSegmentManager, + baseView, + metadataRuleManager, + curator, + new NoopServiceEmitter(), + scheduledExecutorFactory, + null, + null, + new NoopServiceAnnouncer() + { + @Override + public void announce(DruidNode node) + { + // count down when this coordinator becomes the leader + leaderAnnouncerLatch.countDown(); + } + + @Override + public void unannounce(DruidNode node) + { + leaderUnannouncerLatch.countDown(); + } + }, + druidNode, + loadManagementPeons, + null, + new CostBalancerStrategyFactory(), + EasyMock.createNiceMock(LookupCoordinatorManager.class), + new TestDruidLeaderSelector() + ); + } + + @After + public void tearDown() throws Exception + { + baseView.stop(); + sourceLoadQueuePeon.stop(); + sourceLoadQueueChildrenCache.close(); + destinationLoadQueueChildrenCache.close(); + tearDownServerAndCurator(); + } + + @Test(timeout = 5_000) + public void testMoveSegment() throws Exception + { + segmentViewInitLatch = new CountDownLatch(1); + segmentAddedLatch = new CountDownLatch(4); + + segmentRemovedLatch = new CountDownLatch(0); + + CountDownLatch destCountdown = new CountDownLatch(1); + CountDownLatch srcCountdown = new CountDownLatch(1); + setupView(); + + DruidServer source = new DruidServer( + "localhost:1", + "localhost:1", + null, + 10000000L, + ServerType.HISTORICAL, + "default_tier", + 0 + ); + + DruidServer dest = new DruidServer( + "localhost:2", + "localhost:2", + null, + 10000000L, + ServerType.HISTORICAL, + "default_tier", + 0 + ); + + setupZNodeForServer(source, zkPathsConfig, jsonMapper); + setupZNodeForServer(dest, zkPathsConfig, jsonMapper); + + final List sourceSegments = Lists.transform( + ImmutableList.of( + Pair.of("2011-04-01/2011-04-03", "v1"), + Pair.of("2011-04-03/2011-04-06", "v1"), + Pair.of("2011-04-06/2011-04-09", "v1") + ), + input -> dataSegmentWithIntervalAndVersion(input.lhs, input.rhs) + ); + + final List destinationSegments = Lists.transform( + ImmutableList.of( + Pair.of("2011-03-31/2011-04-01", "v1") + ), + input -> dataSegmentWithIntervalAndVersion(input.lhs, input.rhs) + ); + + DataSegment segmentToMove = sourceSegments.get(2); + + List sourceSegKeys = Lists.newArrayList(); + List destSegKeys = Lists.newArrayList(); + + for (DataSegment segment : sourceSegments) { + sourceSegKeys.add(announceBatchSegmentsForServer(source, ImmutableSet.of(segment), zkPathsConfig, jsonMapper)); + } + + for (DataSegment segment : destinationSegments) { + destSegKeys.add(announceBatchSegmentsForServer(dest, ImmutableSet.of(segment), zkPathsConfig, jsonMapper)); + } + + Assert.assertTrue(timing.forWaiting().awaitLatch(segmentViewInitLatch)); + Assert.assertTrue(timing.forWaiting().awaitLatch(segmentAddedLatch)); + + // these child watchers are used to simulate actions of historicals, announcing a segment on noticing a load queue + // for the destination and unannouncing from source server when noticing a drop request + + sourceLoadQueueChildrenCache.getListenable().addListener( + (curatorFramework, pathChildrenCacheEvent) -> { + if (pathChildrenCacheEvent.getType().equals(PathChildrenCacheEvent.Type.INITIALIZED)) { + srcCountdown.countDown(); + } else if (pathChildrenCacheEvent.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED)) { + //Simulate source server dropping segment + unannounceSegmentFromBatchForServer(source, segmentToMove, sourceSegKeys.get(2), zkPathsConfig); + } + } + ); + + destinationLoadQueueChildrenCache.getListenable().addListener( + (curatorFramework, pathChildrenCacheEvent) -> { + if (pathChildrenCacheEvent.getType().equals(PathChildrenCacheEvent.Type.INITIALIZED)) { + destCountdown.countDown(); + } else if (pathChildrenCacheEvent.getType().equals(PathChildrenCacheEvent.Type.CHILD_ADDED)) { + //Simulate destination server loading segment + announceBatchSegmentsForServer(dest, ImmutableSet.of(segmentToMove), zkPathsConfig, jsonMapper); + } + } + ); + + sourceLoadQueueChildrenCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); + destinationLoadQueueChildrenCache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT); + + Assert.assertTrue(timing.forWaiting().awaitLatch(srcCountdown)); + Assert.assertTrue(timing.forWaiting().awaitLatch(destCountdown)); + + + loadManagementPeons.put("localhost:1", sourceLoadQueuePeon); + loadManagementPeons.put("localhost:2", destinationLoadQueuePeon); + + + segmentRemovedLatch = new CountDownLatch(1); + segmentAddedLatch = new CountDownLatch(1); + + ImmutableDruidDataSource druidDataSource = EasyMock.createNiceMock(ImmutableDruidDataSource.class); + EasyMock.expect(druidDataSource.getSegment(EasyMock.anyString())).andReturn(sourceSegments.get(2)); + EasyMock.replay(druidDataSource); + EasyMock.expect(databaseSegmentManager.getInventoryValue(EasyMock.anyString())).andReturn(druidDataSource); + EasyMock.replay(databaseSegmentManager); + + coordinator.moveSegment( + source.toImmutableDruidServer(), + dest.toImmutableDruidServer(), + sourceSegments.get(2), + null + ); + + // wait for destination server to load segment + Assert.assertTrue(timing.forWaiting().awaitLatch(segmentAddedLatch)); + + // remove load queue key from destination server to trigger adding drop to load queue + curator.delete().guaranteed().forPath(ZKPaths.makePath(DESTINATION_LOAD_PATH, segmentToMove.getIdentifier())); + + // wait for drop + Assert.assertTrue(timing.forWaiting().awaitLatch(segmentRemovedLatch)); + + // clean up drop from load queue + curator.delete().guaranteed().forPath(ZKPaths.makePath(SOURCE_LOAD_PATH, segmentToMove.getIdentifier())); + + List servers = serverView.getInventory().stream().collect(Collectors.toList()); + + Assert.assertEquals(2, servers.get(0).getSegments().size()); + Assert.assertEquals(2, servers.get(1).getSegments().size()); + } + + private static class TestDruidLeaderSelector implements DruidLeaderSelector + { + private volatile Listener listener; + private volatile String leader; + + @Override + public String getCurrentLeader() + { + return leader; + } + + @Override + public boolean isLeader() + { + return leader != null; + } + + @Override + public int localTerm() + { + return 0; + } + + @Override + public void registerListener(Listener listener) + { + this.listener = listener; + leader = "what:1234"; + listener.becomeLeader(); + } + + @Override + public void unregisterListener() + { + leader = null; + listener.stopBeingLeader(); + } + } + + private void setupView() throws Exception + { + baseView = new BatchServerInventoryView( + zkPathsConfig, + curator, + jsonMapper, + Predicates.alwaysTrue() + ) + { + @Override + public void registerSegmentCallback(Executor exec, final SegmentCallback callback) + { + super.registerSegmentCallback( + exec, new SegmentCallback() + { + @Override + public CallbackAction segmentAdded(DruidServerMetadata server, DataSegment segment) + { + CallbackAction res = callback.segmentAdded(server, segment); + segmentAddedLatch.countDown(); + return res; + } + + @Override + public CallbackAction segmentRemoved(DruidServerMetadata server, DataSegment segment) + { + CallbackAction res = callback.segmentRemoved(server, segment); + segmentRemovedLatch.countDown(); + return res; + } + + @Override + public CallbackAction segmentViewInitialized() + { + CallbackAction res = callback.segmentViewInitialized(); + segmentViewInitLatch.countDown(); + return res; + } + } + ); + } + }; + + serverView = new CoordinatorServerView(baseView); + + baseView.start(); + + sourceLoadQueuePeon.start(); + destinationLoadQueuePeon.start(); + + coordinator = new DruidCoordinator( + druidCoordinatorConfig, + new ZkPathsConfig() + { + + @Override + public String getBase() + { + return "druid"; + } + }, + configManager, + databaseSegmentManager, + baseView, + metadataRuleManager, + curator, + new NoopServiceEmitter(), + scheduledExecutorFactory, + null, + null, + new NoopServiceAnnouncer() + { + @Override + public void announce(DruidNode node) + { + // count down when this coordinator becomes the leader + leaderAnnouncerLatch.countDown(); + } + + @Override + public void unannounce(DruidNode node) + { + leaderUnannouncerLatch.countDown(); + } + }, + druidNode, + loadManagementPeons, + null, + new CostBalancerStrategyFactory(), + EasyMock.createNiceMock(LookupCoordinatorManager.class), + new TestDruidLeaderSelector() + ); + } + + private DataSegment dataSegmentWithIntervalAndVersion(String intervalStr, String version) + { + return DataSegment.builder() + .dataSource("test_curator_druid_coordinator") + .interval(Intervals.of(intervalStr)) + .loadSpec( + ImmutableMap.of( + "type", + "local", + "path", + "somewhere" + ) + ) + .version(version) + .dimensions(ImmutableList.of()) + .metrics(ImmutableList.of()) + .shardSpec(NoneShardSpec.instance()) + .binaryVersion(9) + .size(0) + .build(); + } +} From 0c4598c1fee402f26d8575d361eef5f62c69b203 Mon Sep 17 00:00:00 2001 From: Arup Malakar Date: Thu, 29 Mar 2018 14:36:40 -0700 Subject: [PATCH 13/67] Fix typo in avatica java client code documenation (#5553) --- docs/content/querying/sql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md index a993a76f9539..09778119f2c5 100644 --- a/docs/content/querying/sql.md +++ b/docs/content/querying/sql.md @@ -382,7 +382,7 @@ Properties connectionProperties = new Properties(); try (Connection connection = DriverManager.getConnection(url, connectionProperties)) { try ( - final Statement statement = client.createStatement(); + final Statement statement = connection.createStatement(); final ResultSet resultSet = statement.executeQuery(query) ) { while (resultSet.next()) { From 05547e29b2099646241fe2fd728286ad0b81296c Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Fri, 30 Mar 2018 12:43:19 -0700 Subject: [PATCH 14/67] Fix SQLMetadataSegmentManager to allow succesive start and stop (#5554) * Fix SQLMetadataSegmentManager to allow succesive start and stop * address comment * add synchronization --- .../io/druid/concurrent/LifecycleLock.java | 20 +++++----- .../druid/concurrent/LifecycleLockTest.java | 3 +- .../metadata/SQLMetadataSegmentManager.java | 37 +++++++++---------- .../cache/LookupCoordinatorManager.java | 29 ++++++++++----- .../metadata/SQLMetadataRuleManagerTest.java | 10 +++++ ...ava => SQLMetadataSegmentManagerTest.java} | 12 +++++- .../cache/LookupCoordinatorManagerTest.java | 21 +++++------ 7 files changed, 80 insertions(+), 52 deletions(-) rename server/src/test/java/io/druid/metadata/{MetadataSegmentManagerTest.java => SQLMetadataSegmentManagerTest.java} (96%) diff --git a/common/src/main/java/io/druid/concurrent/LifecycleLock.java b/common/src/main/java/io/druid/concurrent/LifecycleLock.java index a94e3cd31a3e..7cc8e64a113f 100644 --- a/common/src/main/java/io/druid/concurrent/LifecycleLock.java +++ b/common/src/main/java/io/druid/concurrent/LifecycleLock.java @@ -175,9 +175,9 @@ void exitStop() } } - void reset() + void exitStopAndReset() { - if (!compareAndSetState(STOPPED, NOT_STARTED)) { + if (!compareAndSetState(STOPPING, NOT_STARTED)) { throw new IllegalMonitorStateException("Not called exitStop() before reset()"); } } @@ -187,7 +187,7 @@ void reset() /** * Start latch, only one canStart() call in any thread on this LifecycleLock object could return true, if {@link - * #reset()} is not called in between. + * #exitStopAndReset()} is not called in between. */ public boolean canStart() { @@ -257,8 +257,8 @@ public boolean canStop() } /** - * If this LifecycleLock is used in a restartable object, which uses {@link #reset()}, exitStop() must be called - * before exit from stop() on this object, usually in a finally block. + * Finalizes stopping the the LifecycleLock. This method must be called before exit from stop() on this object, + * usually in a finally block. If you're using a restartable object, use {@link #exitStopAndReset()} instead. * * @throws IllegalMonitorStateException if {@link #canStop()} is not yet called on this LifecycleLock */ @@ -268,12 +268,14 @@ public void exitStop() } /** - * Resets the LifecycleLock after {@link #exitStop()}, so that {@link #canStart()} could be called again. + * Finalizes stopping the LifecycleLock and resets it, so that {@link #canStart()} could be called again. If this + * LifecycleLock is used in a restartable object, this method must be called before exit from stop() on this object, + * usually in a finally block. * - * @throws IllegalMonitorStateException if {@link #exitStop()} is not yet called on this LifecycleLock + * @throws IllegalMonitorStateException if {@link #canStop()} is not yet called on this LifecycleLock */ - public void reset() + public void exitStopAndReset() { - sync.reset(); + sync.exitStopAndReset(); } } diff --git a/common/src/test/java/io/druid/concurrent/LifecycleLockTest.java b/common/src/test/java/io/druid/concurrent/LifecycleLockTest.java index e1d9f8adbbf7..afb55bf2c2c2 100644 --- a/common/src/test/java/io/druid/concurrent/LifecycleLockTest.java +++ b/common/src/test/java/io/druid/concurrent/LifecycleLockTest.java @@ -138,8 +138,7 @@ public void testRestart() lifecycleLock.started(); lifecycleLock.exitStart(); Assert.assertTrue(lifecycleLock.canStop()); - lifecycleLock.exitStop(); - lifecycleLock.reset(); + lifecycleLock.exitStopAndReset(); Assert.assertTrue(lifecycleLock.canStart()); } diff --git a/server/src/main/java/io/druid/metadata/SQLMetadataSegmentManager.java b/server/src/main/java/io/druid/metadata/SQLMetadataSegmentManager.java index 2a4815b4722f..8d412368b59d 100644 --- a/server/src/main/java/io/druid/metadata/SQLMetadataSegmentManager.java +++ b/server/src/main/java/io/druid/metadata/SQLMetadataSegmentManager.java @@ -35,7 +35,6 @@ import com.google.inject.Inject; import io.druid.client.DruidDataSource; import io.druid.client.ImmutableDruidDataSource; -import io.druid.concurrent.LifecycleLock; import io.druid.guice.ManageLifecycle; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.Intervals; @@ -86,7 +85,10 @@ public class SQLMetadataSegmentManager implements MetadataSegmentManager private static final Interner DATA_SEGMENT_INTERNER = Interners.newWeakInterner(); private static final EmittingLogger log = new EmittingLogger(SQLMetadataSegmentManager.class); - private final LifecycleLock lifecycleLock = new LifecycleLock(); + // Use to synchronize start() and stop(). These methods should be synchronized to prevent from being called at the + // same time if two different threads are calling them. This might be possible if a druid coordinator gets and drops + // leadership repeatedly in quick succession. + private final Object lock = new Object(); private final ObjectMapper jsonMapper; private final Supplier config; @@ -96,6 +98,7 @@ public class SQLMetadataSegmentManager implements MetadataSegmentManager private volatile ListeningScheduledExecutorService exec = null; private volatile ListenableFuture future = null; + private volatile boolean started; @Inject public SQLMetadataSegmentManager( @@ -116,11 +119,11 @@ public SQLMetadataSegmentManager( @LifecycleStart public void start() { - if (!lifecycleLock.canStart()) { - return; - } + synchronized (lock) { + if (started) { + return; + } - try { exec = MoreExecutors.listeningDecorator(Execs.scheduledSingleThreaded("DatabaseSegmentManager-Exec--%d")); final Duration delay = config.get().getPollDuration().toStandardDuration(); @@ -143,10 +146,7 @@ public void run() delay.getMillis(), TimeUnit.MILLISECONDS ); - lifecycleLock.started(); - } - finally { - lifecycleLock.exitStart(); + started = true; } } @@ -154,10 +154,11 @@ public void run() @LifecycleStop public void stop() { - if (!lifecycleLock.canStop()) { - return; - } - try { + synchronized (lock) { + if (!started) { + return; + } + final ConcurrentHashMap emptyMap = new ConcurrentHashMap<>(); ConcurrentHashMap current; do { @@ -168,9 +169,7 @@ public void stop() future = null; exec.shutdownNow(); exec = null; - } - finally { - lifecycleLock.exitStop(); + started = false; } } @@ -366,7 +365,7 @@ public boolean removeSegment(String ds, final String segmentID) @Override public boolean isStarted() { - return lifecycleLock.isStarted(); + return started; } @Override @@ -420,7 +419,7 @@ public List fold( public void poll() { try { - if (!lifecycleLock.isStarted()) { + if (!started) { return; } diff --git a/server/src/main/java/io/druid/server/lookup/cache/LookupCoordinatorManager.java b/server/src/main/java/io/druid/server/lookup/cache/LookupCoordinatorManager.java index ef9d3ff94593..3c4c8f754407 100644 --- a/server/src/main/java/io/druid/server/lookup/cache/LookupCoordinatorManager.java +++ b/server/src/main/java/io/druid/server/lookup/cache/LookupCoordinatorManager.java @@ -36,12 +36,6 @@ import com.google.common.util.concurrent.ListeningScheduledExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.google.inject.Inject; -import io.druid.java.util.emitter.EmittingLogger; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.ClientResponse; -import io.druid.java.util.http.client.response.HttpResponseHandler; -import io.druid.java.util.http.client.response.SequenceInputStreamResponseHandler; import io.druid.audit.AuditInfo; import io.druid.common.config.JacksonConfigManager; import io.druid.concurrent.LifecycleLock; @@ -54,6 +48,12 @@ import io.druid.java.util.common.StreamUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.concurrent.Execs; +import io.druid.java.util.emitter.EmittingLogger; +import io.druid.java.util.http.client.HttpClient; +import io.druid.java.util.http.client.Request; +import io.druid.java.util.http.client.response.ClientResponse; +import io.druid.java.util.http.client.response.HttpResponseHandler; +import io.druid.java.util.http.client.response.SequenceInputStreamResponseHandler; import io.druid.query.lookup.LookupsState; import io.druid.server.http.HostAndPortWithScheme; import io.druid.server.listener.resource.ListenerResource; @@ -124,8 +124,7 @@ public class LookupCoordinatorManager // Updated by config watching service private AtomicReference>> lookupMapConfigRef; - @VisibleForTesting - final LifecycleLock lifecycleLock = new LifecycleLock(); + private final LifecycleLock lifecycleLock = new LifecycleLock(); private ListeningScheduledExecutorService executorService; private ListenableScheduledFuture backgroundManagerFuture; @@ -333,6 +332,17 @@ public LookupExtractorFactoryMapContainer getLookup(final String tier, final Str return tierLookups.get(lookupName); } + public boolean isStarted() + { + return lifecycleLock.isStarted(); + } + + @VisibleForTesting + boolean awaitStarted(long waitTimeMs) + { + return lifecycleLock.awaitStarted(waitTimeMs, TimeUnit.MILLISECONDS); + } + // start() and stop() are synchronized so that they never run in parallel in case of ZK acting funny or druid bug and // coordinator becomes leader and drops leadership in quick succession. public void start() @@ -439,8 +449,7 @@ public void stop() } finally { //so that subsequent start() would happen, even if stop() failed with exception - lifecycleLock.exitStop(); - lifecycleLock.reset(); + lifecycleLock.exitStopAndReset(); } } } diff --git a/server/src/test/java/io/druid/metadata/SQLMetadataRuleManagerTest.java b/server/src/test/java/io/druid/metadata/SQLMetadataRuleManagerTest.java index fb485492f9be..4bee622d0e5b 100644 --- a/server/src/test/java/io/druid/metadata/SQLMetadataRuleManagerTest.java +++ b/server/src/test/java/io/druid/metadata/SQLMetadataRuleManagerTest.java @@ -82,6 +82,16 @@ public void setUp() ); } + @Test + public void testMultipleStopAndStart() + { + // Simulate successive losing and getting the coordinator leadership + ruleManager.start(); + ruleManager.stop(); + ruleManager.start(); + ruleManager.stop(); + } + @Test public void testRuleInsert() { diff --git a/server/src/test/java/io/druid/metadata/MetadataSegmentManagerTest.java b/server/src/test/java/io/druid/metadata/SQLMetadataSegmentManagerTest.java similarity index 96% rename from server/src/test/java/io/druid/metadata/MetadataSegmentManagerTest.java rename to server/src/test/java/io/druid/metadata/SQLMetadataSegmentManagerTest.java index 2a67755d13de..df904a02588d 100644 --- a/server/src/test/java/io/druid/metadata/MetadataSegmentManagerTest.java +++ b/server/src/test/java/io/druid/metadata/SQLMetadataSegmentManagerTest.java @@ -41,7 +41,7 @@ import java.io.IOException; -public class MetadataSegmentManagerTest +public class SQLMetadataSegmentManagerTest { @Rule public final TestDerbyConnector.DerbyConnectorRule derbyConnectorRule = new TestDerbyConnector.DerbyConnectorRule(); @@ -230,4 +230,14 @@ public void testRemoveDataSegment() throws IOException Assert.assertNull(manager.getInventoryValue(newDataSource)); Assert.assertTrue(manager.removeSegment(newDataSource, newSegment.getIdentifier())); } + + @Test + public void testStopAndStart() + { + // Simulate successive losing and getting the coordinator leadership + manager.start(); + manager.stop(); + manager.start(); + manager.stop(); + } } diff --git a/server/src/test/java/io/druid/server/lookup/cache/LookupCoordinatorManagerTest.java b/server/src/test/java/io/druid/server/lookup/cache/LookupCoordinatorManagerTest.java index 3c63189a8603..3eb29e254943 100644 --- a/server/src/test/java/io/druid/server/lookup/cache/LookupCoordinatorManagerTest.java +++ b/server/src/test/java/io/druid/server/lookup/cache/LookupCoordinatorManagerTest.java @@ -62,7 +62,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -1256,15 +1255,15 @@ public void testStartStop() throws Exception lookupCoordinatorManagerConfig ); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.isStarted()); manager.start(); - Assert.assertTrue(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertTrue(manager.awaitStarted(1)); Assert.assertTrue(manager.backgroundManagerIsRunning()); Assert.assertFalse(manager.waitForBackgroundTermination(10)); manager.stop(); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.awaitStarted(1)); Assert.assertTrue(manager.waitForBackgroundTermination(10)); Assert.assertFalse(manager.backgroundManagerIsRunning()); @@ -1293,35 +1292,35 @@ public void testMultipleStartStop() throws Exception lookupCoordinatorManagerConfig ); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.awaitStarted(1)); manager.start(); - Assert.assertTrue(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertTrue(manager.awaitStarted(1)); Assert.assertTrue(manager.backgroundManagerIsRunning()); Assert.assertFalse(manager.waitForBackgroundTermination(10)); manager.stop(); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.awaitStarted(1)); Assert.assertTrue(manager.waitForBackgroundTermination(10)); Assert.assertFalse(manager.backgroundManagerIsRunning()); manager.start(); - Assert.assertTrue(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertTrue(manager.awaitStarted(1)); Assert.assertTrue(manager.backgroundManagerIsRunning()); Assert.assertFalse(manager.waitForBackgroundTermination(10)); manager.stop(); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.awaitStarted(1)); Assert.assertTrue(manager.waitForBackgroundTermination(10)); Assert.assertFalse(manager.backgroundManagerIsRunning()); manager.start(); - Assert.assertTrue(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertTrue(manager.awaitStarted(1)); Assert.assertTrue(manager.backgroundManagerIsRunning()); Assert.assertFalse(manager.waitForBackgroundTermination(10)); manager.stop(); - Assert.assertFalse(manager.lifecycleLock.awaitStarted(1, TimeUnit.MILLISECONDS)); + Assert.assertFalse(manager.awaitStarted(1)); Assert.assertTrue(manager.waitForBackgroundTermination(10)); Assert.assertFalse(manager.backgroundManagerIsRunning()); From 6feac204e331e196940e2936c7f2a212135e89b8 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 2 Apr 2018 09:40:20 -0700 Subject: [PATCH 15/67] Coordinator primary segment assignment fix (#5532) * fix issue where assign primary assigns segments to all historical servers in cluster * fix test * add test to ensure primary assignment will not assign to another server while loading is in progress --- .../coordinator/SegmentReplicantLookup.java | 43 +++++- .../server/coordinator/rules/LoadRule.java | 8 +- .../DruidCoordinatorRuleRunnerTest.java | 3 + .../coordinator/rules/LoadRuleTest.java | 139 ++++++++++++++++++ 4 files changed, 187 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordinator/SegmentReplicantLookup.java b/server/src/main/java/io/druid/server/coordinator/SegmentReplicantLookup.java index a713c8d181c1..357eeb99be7c 100644 --- a/server/src/main/java/io/druid/server/coordinator/SegmentReplicantLookup.java +++ b/server/src/main/java/io/druid/server/coordinator/SegmentReplicantLookup.java @@ -36,6 +36,7 @@ public class SegmentReplicantLookup public static SegmentReplicantLookup make(DruidCluster cluster) { final Table segmentsInCluster = HashBasedTable.create(); + final Table loadingSegments = HashBasedTable.create(); for (SortedSet serversByType : cluster.getSortedHistoricalsByTier()) { for (ServerHolder serverHolder : serversByType) { @@ -48,17 +49,29 @@ public static SegmentReplicantLookup make(DruidCluster cluster) } segmentsInCluster.put(segment.getIdentifier(), server.getTier(), ++numReplicants); } + + // Also account for queued segments + for (DataSegment segment : serverHolder.getPeon().getSegmentsToLoad()) { + Integer numReplicants = loadingSegments.get(segment.getIdentifier(), server.getTier()); + if (numReplicants == null) { + numReplicants = 0; + } + loadingSegments.put(segment.getIdentifier(), server.getTier(), ++numReplicants); + } } } - return new SegmentReplicantLookup(segmentsInCluster); + return new SegmentReplicantLookup(segmentsInCluster, loadingSegments); } private final Table segmentsInCluster; - private SegmentReplicantLookup(Table segmentsInCluster) + private final Table loadingSegments; + + private SegmentReplicantLookup(Table segmentsInCluster, Table loadingSegments) { this.segmentsInCluster = segmentsInCluster; + this.loadingSegments = loadingSegments; } public Map getClusterTiers(String segmentId) @@ -82,4 +95,30 @@ public int getLoadedReplicants(String segmentId, String tier) Integer retVal = segmentsInCluster.get(segmentId, tier); return (retVal == null) ? 0 : retVal; } + + public int getLoadingReplicants(String segmentId, String tier) + { + Integer retVal = loadingSegments.get(segmentId, tier); + return (retVal == null) ? 0 : retVal; + } + + public int getLoadingReplicants(String segmentId) + { + Map allTiers = loadingSegments.row(segmentId); + int retVal = 0; + for (Integer replicants : allTiers.values()) { + retVal += replicants; + } + return retVal; + } + + public int getTotalReplicants(String segmentId) + { + return getLoadedReplicants(segmentId) + getLoadingReplicants(segmentId); + } + + public int getTotalReplicants(String segmentId, String tier) + { + return getLoadedReplicants(segmentId, tier) + getLoadingReplicants(segmentId, tier); + } } diff --git a/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java b/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java index 5af4d822c4b3..dc615f800518 100644 --- a/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java +++ b/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java @@ -95,8 +95,9 @@ private void assign( final CoordinatorStats stats ) { - // if primary replica already exists - if (!currentReplicants.isEmpty()) { + // if primary replica already exists or is loading + final int loading = params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier()); + if (!currentReplicants.isEmpty() || loading > 0) { assignReplicas(params, segment, stats, null); } else { final ServerHolder primaryHolderToLoad = assignPrimary(params, segment); @@ -169,7 +170,6 @@ private ServerHolder assignPrimary( if (targetReplicantsInTier <= 0) { continue; } - final String tier = entry.getKey(); final List holders = getFilteredHolders( @@ -228,7 +228,7 @@ private void assignReplicas( final int numAssigned = assignReplicasForTier( tier, entry.getIntValue(), - currentReplicants.getOrDefault(tier, 0), + params.getSegmentReplicantLookup().getTotalReplicants(segment.getIdentifier(), tier), params, createLoadQueueSizeLimitingPredicate(params), segment diff --git a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorRuleRunnerTest.java b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorRuleRunnerTest.java index 0b7c71ba2ce6..ba32af632ccc 100644 --- a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorRuleRunnerTest.java +++ b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorRuleRunnerTest.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import io.druid.client.DruidServer; @@ -942,6 +943,8 @@ public void testDropServerActuallyServesSegment() LoadQueuePeon anotherMockPeon = EasyMock.createMock(LoadQueuePeon.class); EasyMock.expect(anotherMockPeon.getLoadQueueSize()).andReturn(10L).atLeastOnce(); + EasyMock.expect(anotherMockPeon.getSegmentsToLoad()).andReturn(Sets.newHashSet()).anyTimes(); + EasyMock.replay(anotherMockPeon); DruidCluster druidCluster = new DruidCluster( diff --git a/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java index 51200461a959..4848503a5738 100644 --- a/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java @@ -20,7 +20,9 @@ package io.druid.server.coordinator.rules; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -58,7 +60,9 @@ import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.TreeSet; import java.util.concurrent.Executors; import java.util.stream.Collectors; @@ -190,6 +194,127 @@ public void testLoad() EasyMock.verify(throttler, mockPeon, mockBalancerStrategy); } + @Test + public void testLoadPrimaryAssignDoesNotOverAssign() + { + EasyMock.expect(throttler.canCreateReplicant(EasyMock.anyString())).andReturn(true).anyTimes(); + + final LoadQueuePeon mockPeon = createEmptyPeon(); + mockPeon.loadSegment(EasyMock.anyObject(), EasyMock.anyObject()); + EasyMock.expectLastCall().atLeastOnce(); + + LoadRule rule = createLoadRule(ImmutableMap.of( + "hot", 1 + )); + + final DataSegment segment = createDataSegment("foo"); + + EasyMock.expect(mockBalancerStrategy.findNewSegmentHomeReplicator(EasyMock.anyObject(), EasyMock.anyObject())) + .andDelegateTo(balancerStrategy) + .anyTimes(); + + EasyMock.replay(throttler, mockPeon, mockBalancerStrategy); + + DruidCluster druidCluster = new DruidCluster( + null, + ImmutableMap.of( + "hot", + Stream.of( + new ServerHolder( + new DruidServer( + "serverHot", + "hostHot", + null, + 1000, + ServerType.HISTORICAL, + "hot", + 1 + ).toImmutableDruidServer(), + mockPeon + ), new ServerHolder( + new DruidServer( + "serverHot2", + "hostHot2", + null, + 1000, + ServerType.HISTORICAL, + "hot", + 1 + ).toImmutableDruidServer(), + mockPeon + ) + ).collect(Collectors.toCollection(() -> new TreeSet<>(Collections.reverseOrder()))) + ) + ); + + CoordinatorStats stats = rule.run( + null, + DruidCoordinatorRuntimeParams.newBuilder() + .withDruidCluster(druidCluster) + .withSegmentReplicantLookup(SegmentReplicantLookup.make(druidCluster)) + .withReplicationManager(throttler) + .withBalancerStrategy(mockBalancerStrategy) + .withBalancerReferenceTimestamp(DateTimes.of("2013-01-01")) + .withAvailableSegments(Arrays.asList(segment)).build(), + segment + ); + + + Assert.assertEquals(1L, stats.getTieredStat(LoadRule.ASSIGNED_COUNT, "hot")); + + // ensure multiple runs don't assign primary segment again if at replication count + final LoadQueuePeon loadingPeon = createLoadingPeon(ImmutableList.of(segment)); + EasyMock.replay(loadingPeon); + + DruidCluster afterLoad = new DruidCluster( + null, + ImmutableMap.of( + "hot", + Stream.of( + new ServerHolder( + new DruidServer( + "serverHot", + "hostHot", + null, + 1000, + ServerType.HISTORICAL, + "hot", + 1 + ).toImmutableDruidServer(), + loadingPeon + ), new ServerHolder( + new DruidServer( + "serverHot2", + "hostHot2", + null, + 1000, + ServerType.HISTORICAL, + "hot", + 1 + ).toImmutableDruidServer(), + mockPeon + ) + ).collect(Collectors.toCollection(() -> new TreeSet<>(Collections.reverseOrder()))) + ) + ); + CoordinatorStats statsAfterLoadPrimary = rule.run( + null, + DruidCoordinatorRuntimeParams.newBuilder() + .withDruidCluster(afterLoad) + .withSegmentReplicantLookup(SegmentReplicantLookup.make(afterLoad)) + .withReplicationManager(throttler) + .withBalancerStrategy(mockBalancerStrategy) + .withBalancerReferenceTimestamp(DateTimes.of("2013-01-01")) + .withAvailableSegments(Arrays.asList(segment)).build(), + segment + ); + + + Assert.assertEquals(0, statsAfterLoadPrimary.getTieredStat(LoadRule.ASSIGNED_COUNT, "hot")); + + EasyMock.verify(throttler, mockPeon, mockBalancerStrategy); + } + @Test public void testLoadPriority() { @@ -619,4 +744,18 @@ private static LoadQueuePeon createEmptyPeon() return mockPeon; } + + private static LoadQueuePeon createLoadingPeon(List segments) + { + final Set segs = ImmutableSet.copyOf(segments); + final long loadingSize = segs.stream().mapToLong(DataSegment::getSize).sum(); + + final LoadQueuePeon mockPeon = EasyMock.createMock(LoadQueuePeon.class); + EasyMock.expect(mockPeon.getSegmentsToLoad()).andReturn(segs).anyTimes(); + EasyMock.expect(mockPeon.getSegmentsMarkedToDrop()).andReturn(Sets.newHashSet()).anyTimes(); + EasyMock.expect(mockPeon.getLoadQueueSize()).andReturn(loadingSize).anyTimes(); + EasyMock.expect(mockPeon.getNumberOfSegmentsInQueue()).andReturn(segs.size()).anyTimes(); + + return mockPeon; + } } From 723f7ac55095829c57f8d8a8a03aaca97adc2a49 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Mon, 2 Apr 2018 12:10:56 -0700 Subject: [PATCH 16/67] Add support for task reports, upload reports to deep storage (#5524) * Add support for task reports, upload reports to deep storage * PR comments * Better name for method * Fix report file upload * Use TaskReportFileWriter * Checkstyle * More PR comments --- .../java/io/druid/tasklogs/NoopTaskLogs.java | 7 +++ .../java/io/druid/tasklogs/TaskLogPusher.java | 4 ++ .../io/druid/tasklogs/TaskLogStreamer.java | 5 ++ .../io/druid/storage/azure/AzureTaskLogs.java | 30 +++++++++- .../druid/storage/google/GoogleTaskLogs.java | 28 +++++++++ .../storage/hdfs/tasklog/HdfsTaskLogs.java | 38 +++++++++++- .../druid/indexing/kafka/KafkaIndexTask.java | 2 + .../indexing/kafka/KafkaIndexTaskTest.java | 4 +- .../java/io/druid/storage/s3/S3TaskLogs.java | 31 ++++++++-- .../io/druid/indexing/common/TaskReport.java | 54 +++++++++++++++++ .../indexing/common/TaskReportFileWriter.java | 58 +++++++++++++++++++ .../io/druid/indexing/common/TaskToolbox.java | 11 +++- .../indexing/common/TaskToolboxFactory.java | 8 ++- .../AppenderatorDriverRealtimeIndexTask.java | 1 + .../indexing/common/task/HadoopIndexTask.java | 3 + .../druid/indexing/common/task/IndexTask.java | 2 + .../common/tasklogs/FileTaskLogs.java | 40 +++++++++++-- .../tasklogs/SwitchingTaskLogStreamer.java | 13 +++++ .../indexing/overlord/ForkingTaskRunner.java | 5 ++ .../overlord/http/OverlordResource.java | 27 +++++++++ .../worker/executor/ExecutorLifecycle.java | 3 +- .../indexing/common/TaskToolboxTest.java | 4 +- ...penderatorDriverRealtimeIndexTaskTest.java | 3 +- .../common/task/CompactionTaskTest.java | 3 +- .../indexing/common/task/IndexTaskTest.java | 3 +- .../common/task/NoopTestTaskFileWriter.java | 36 ++++++++++++ .../common/task/RealtimeIndexTaskTest.java | 3 +- .../task/SameIntervalMergeTaskTest.java | 3 +- .../IngestSegmentFirehoseFactoryTest.java | 4 +- ...estSegmentFirehoseFactoryTimelineTest.java | 4 +- .../indexing/overlord/TaskLifecycleTest.java | 4 +- .../worker/WorkerTaskManagerTest.java | 4 +- .../worker/WorkerTaskMonitorTest.java | 4 +- .../src/main/java/io/druid/cli/CliPeon.java | 26 ++++++++- 34 files changed, 444 insertions(+), 31 deletions(-) create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java create mode 100644 indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java diff --git a/api/src/main/java/io/druid/tasklogs/NoopTaskLogs.java b/api/src/main/java/io/druid/tasklogs/NoopTaskLogs.java index 6fb0f309cfe2..d54c63cce182 100644 --- a/api/src/main/java/io/druid/tasklogs/NoopTaskLogs.java +++ b/api/src/main/java/io/druid/tasklogs/NoopTaskLogs.java @@ -24,6 +24,7 @@ import io.druid.java.util.common.logger.Logger; import java.io.File; +import java.io.IOException; public class NoopTaskLogs implements TaskLogs { @@ -41,6 +42,12 @@ public void pushTaskLog(String taskid, File logFile) log.info("Not pushing logs for task: %s", taskid); } + @Override + public void pushTaskReports(String taskid, File reportFile) throws IOException + { + log.info("Not pushing reports for task: %s", taskid); + } + @Override public void killAll() { diff --git a/api/src/main/java/io/druid/tasklogs/TaskLogPusher.java b/api/src/main/java/io/druid/tasklogs/TaskLogPusher.java index a904a16f5d1e..6329aac866d8 100644 --- a/api/src/main/java/io/druid/tasklogs/TaskLogPusher.java +++ b/api/src/main/java/io/druid/tasklogs/TaskLogPusher.java @@ -31,4 +31,8 @@ public interface TaskLogPusher { void pushTaskLog(String taskid, File logFile) throws IOException; + + default void pushTaskReports(String taskid, File reportFile) throws IOException + { + } } diff --git a/api/src/main/java/io/druid/tasklogs/TaskLogStreamer.java b/api/src/main/java/io/druid/tasklogs/TaskLogStreamer.java index b685c7b7659b..7569cdd145ba 100644 --- a/api/src/main/java/io/druid/tasklogs/TaskLogStreamer.java +++ b/api/src/main/java/io/druid/tasklogs/TaskLogStreamer.java @@ -40,4 +40,9 @@ public interface TaskLogStreamer * @return input supplier for this log, if available from this provider */ Optional streamTaskLog(String taskid, long offset) throws IOException; + + default Optional streamTaskReports(final String taskid) throws IOException + { + return Optional.absent(); + } } diff --git a/extensions-contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureTaskLogs.java b/extensions-contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureTaskLogs.java index 2cd17c1c41c1..8fe5f3b39266 100644 --- a/extensions-contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureTaskLogs.java +++ b/extensions-contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureTaskLogs.java @@ -54,7 +54,19 @@ public void pushTaskLog(final String taskid, final File logFile) { final String taskKey = getTaskLogKey(taskid); log.info("Pushing task log %s to: %s", logFile, taskKey); + pushTaskFile(taskid, logFile, taskKey); + } + + @Override + public void pushTaskReports(String taskid, File reportFile) throws IOException + { + final String taskKey = getTaskReportsKey(taskid); + log.info("Pushing task reports %s to: %s", reportFile, taskKey); + pushTaskFile(taskid, reportFile, taskKey); + } + private void pushTaskFile(final String taskId, final File logFile, String taskKey) + { try { AzureUtils.retryAzureOperation( () -> { @@ -71,9 +83,19 @@ public void pushTaskLog(final String taskid, final File logFile) @Override public Optional streamTaskLog(final String taskid, final long offset) throws IOException + { + return streamTaskFile(taskid, offset, getTaskLogKey(taskid)); + } + + @Override + public Optional streamTaskReports(String taskid) throws IOException + { + return streamTaskFile(taskid, 0, getTaskReportsKey(taskid)); + } + + private Optional streamTaskFile(final String taskid, final long offset, String taskKey) throws IOException { final String container = config.getContainer(); - final String taskKey = getTaskLogKey(taskid); try { if (!azureStorage.getBlobExists(container, taskKey)) { @@ -116,12 +138,16 @@ public InputStream openStream() throws IOException } } - private String getTaskLogKey(String taskid) { return StringUtils.format("%s/%s/log", config.getPrefix(), taskid); } + private String getTaskReportsKey(String taskid) + { + return StringUtils.format("%s/%s/report.json", config.getPrefix(), taskid); + } + @Override public void killAll() { diff --git a/extensions-contrib/google-extensions/src/main/java/io/druid/storage/google/GoogleTaskLogs.java b/extensions-contrib/google-extensions/src/main/java/io/druid/storage/google/GoogleTaskLogs.java index 6ed64576266f..d379f3445abb 100644 --- a/extensions-contrib/google-extensions/src/main/java/io/druid/storage/google/GoogleTaskLogs.java +++ b/extensions-contrib/google-extensions/src/main/java/io/druid/storage/google/GoogleTaskLogs.java @@ -51,7 +51,19 @@ public void pushTaskLog(final String taskid, final File logFile) throws IOExcept { final String taskKey = getTaskLogKey(taskid); LOG.info("Pushing task log %s to: %s", logFile, taskKey); + pushTaskFile(taskid, logFile, taskKey); + } + + @Override + public void pushTaskReports(String taskid, File reportFile) throws IOException + { + final String taskKey = getTaskReportKey(taskid); + LOG.info("Pushing task reports %s to: %s", reportFile, taskKey); + pushTaskFile(taskid, reportFile, taskKey); + } + private void pushTaskFile(final String taskid, final File logFile, final String taskKey) throws IOException + { FileInputStream fileSteam = new FileInputStream(logFile); InputStreamContent mediaContent = new InputStreamContent("text/plain", fileSteam); @@ -64,7 +76,18 @@ public void pushTaskLog(final String taskid, final File logFile) throws IOExcept public Optional streamTaskLog(final String taskid, final long offset) throws IOException { final String taskKey = getTaskLogKey(taskid); + return streamTaskFile(taskid, offset, taskKey); + } + @Override + public Optional streamTaskReports(String taskid) throws IOException + { + final String taskKey = getTaskReportKey(taskid); + return streamTaskFile(taskid, 0, taskKey); + } + + private Optional streamTaskFile(final String taskid, final long offset, String taskKey) throws IOException + { try { if (!storage.exists(config.getBucket(), taskKey)) { return Optional.absent(); @@ -111,6 +134,11 @@ private String getTaskLogKey(String taskid) return config.getPrefix() + "/" + taskid.replaceAll(":", "_"); } + private String getTaskReportKey(String taskid) + { + return config.getPrefix() + "/" + taskid.replaceAll(":", "_") + ".report.json"; + } + @Override public void killAll() { diff --git a/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java b/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java index 5851c2fa48a9..61da166187f0 100644 --- a/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java +++ b/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/tasklog/HdfsTaskLogs.java @@ -61,6 +61,21 @@ public void pushTaskLog(String taskId, File logFile) throws IOException { final Path path = getTaskLogFileFromId(taskId); log.info("Writing task log to: %s", path); + pushTaskFile(path, logFile); + log.info("Wrote task log to: %s", path); + } + + @Override + public void pushTaskReports(String taskId, File reportFile) throws IOException + { + final Path path = getTaskReportsFileFromId(taskId); + log.info("Writing task reports to: %s", path); + pushTaskFile(path, reportFile); + log.info("Wrote task reports to: %s", path); + } + + private void pushTaskFile(Path path, File logFile) throws IOException + { final FileSystem fs = path.getFileSystem(hadoopConfig); try ( final InputStream in = new FileInputStream(logFile); @@ -68,14 +83,24 @@ public void pushTaskLog(String taskId, File logFile) throws IOException ) { ByteStreams.copy(in, out); } - - log.info("Wrote task log to: %s", path); } @Override public Optional streamTaskLog(final String taskId, final long offset) throws IOException { final Path path = getTaskLogFileFromId(taskId); + return streamTaskFile(path, offset); + } + + @Override + public Optional streamTaskReports(String taskId) throws IOException + { + final Path path = getTaskReportsFileFromId(taskId); + return streamTaskFile(path, 0); + } + + private Optional streamTaskFile(final Path path, final long offset) throws IOException + { final FileSystem fs = path.getFileSystem(hadoopConfig); if (fs.exists(path)) { return Optional.of( @@ -113,6 +138,15 @@ private Path getTaskLogFileFromId(String taskId) return new Path(mergePaths(config.getDirectory(), taskId.replaceAll(":", "_"))); } + /** + * Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in + * path names. So we format paths differently for HDFS. + */ + private Path getTaskReportsFileFromId(String taskId) + { + return new Path(mergePaths(config.getDirectory(), taskId.replaceAll(":", "_") + ".reports.json")); + } + // some hadoop version Path.mergePaths does not exist private static String mergePaths(String path1, String path2) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index a325948a6506..82558f0c796a 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -904,6 +904,7 @@ public void onFailure(Throwable t) toolbox.getDataSegmentServerAnnouncer().unannounce(); } + toolbox.getTaskReportFileWriter().write(null); return success(); } @@ -1272,6 +1273,7 @@ public String apply(DataSegment input) toolbox.getDataSegmentServerAnnouncer().unannounce(); } + toolbox.getTaskReportFileWriter().write(null); return success(); } diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 8bdd3c7f087f..9fb0495284f2 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -57,6 +57,7 @@ import io.druid.indexing.common.actions.TaskActionToolbox; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.config.TaskStorageConfig; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.kafka.supervisor.KafkaSupervisor; import io.druid.indexing.kafka.test.TestBroker; @@ -2032,7 +2033,8 @@ public List getLocations() EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), - new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0) + new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), + new NoopTestTaskFileWriter() ); } diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java index afef97a8892c..e2f6d4310e43 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3TaskLogs.java @@ -57,8 +57,19 @@ public S3TaskLogs(S3TaskLogsConfig config, AmazonS3 service) @Override public Optional streamTaskLog(final String taskid, final long offset) throws IOException { - final String taskKey = getTaskLogKey(taskid); + final String taskKey = getTaskLogKey(taskid, "log"); + return streamTaskFile(offset, taskKey); + } + + @Override + public Optional streamTaskReports(String taskid) throws IOException + { + final String taskKey = getTaskLogKey(taskid, "report.json"); + return streamTaskFile(0, taskKey); + } + private Optional streamTaskFile(final long offset, String taskKey) throws IOException + { try { final ObjectMetadata objectMetadata = service.getObjectMetadata(config.getS3Bucket(), taskKey); @@ -107,9 +118,21 @@ public InputStream openStream() throws IOException @Override public void pushTaskLog(final String taskid, final File logFile) throws IOException { - final String taskKey = getTaskLogKey(taskid); + final String taskKey = getTaskLogKey(taskid, "log"); log.info("Pushing task log %s to: %s", logFile, taskKey); + pushTaskFile(logFile, taskKey); + } + + @Override + public void pushTaskReports(String taskid, File reportFile) throws IOException + { + final String taskKey = getTaskLogKey(taskid, "report.json"); + log.info("Pushing task reports %s to: %s", reportFile, taskKey); + pushTaskFile(reportFile, taskKey); + } + private void pushTaskFile(final File logFile, String taskKey) throws IOException + { try { S3Utils.retryS3Operation( () -> { @@ -124,9 +147,9 @@ public void pushTaskLog(final String taskid, final File logFile) throws IOExcept } } - private String getTaskLogKey(String taskid) + private String getTaskLogKey(String taskid, String filename) { - return StringUtils.format("%s/%s/log", config.getS3Prefix(), taskid); + return StringUtils.format("%s/%s/%s", config.getS3Prefix(), taskid, filename); } @Override diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java new file mode 100644 index 000000000000..eff6520741ba --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java @@ -0,0 +1,54 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.google.common.collect.Maps; + +import java.util.Map; + +/** + * TaskReport objects contain additional information about an indexing task, such as row statistics, errors, and + * published segments. They are kept in deep storage along with task logs. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") +@JsonSubTypes(value = { +}) +public interface TaskReport +{ + String getTaskId(); + + String getReportKey(); + + /** + * @return A JSON-serializable Object that contains a TaskReport's information + */ + Object getPayload(); + + static Map buildTaskReports(TaskReport... taskReports) + { + Map taskReportMap = Maps.newHashMap(); + for (TaskReport taskReport : taskReports) { + taskReportMap.put(taskReport.getReportKey(), taskReport); + } + return taskReportMap; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java new file mode 100644 index 000000000000..eb5e9d9db40b --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java @@ -0,0 +1,58 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.java.util.common.logger.Logger; +import org.apache.commons.io.FileUtils; + +import java.io.File; + +public class TaskReportFileWriter +{ + private static final Logger log = new Logger(TaskReportFileWriter.class); + + private final File reportsFile; + private ObjectMapper objectMapper; + + public TaskReportFileWriter(File reportFile) + { + this.reportsFile = reportFile; + } + + public void write(TaskReport report) + { + try { + final File reportsFileParent = reportsFile.getParentFile(); + if (reportsFileParent != null) { + FileUtils.forceMkdir(reportsFileParent); + } + objectMapper.writeValue(reportsFile, report); + } + catch (Exception e) { + log.error(e, "Encountered exception in write()."); + } + } + + public void setObjectMapper(ObjectMapper objectMapper) + { + this.objectMapper = objectMapper; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java index dd132769192f..9deab2bc497c 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolbox.java @@ -90,6 +90,7 @@ public class TaskToolbox private final Cache cache; private final CacheConfig cacheConfig; private final IndexMergerV9 indexMergerV9; + private final TaskReportFileWriter taskReportFileWriter; private final DruidNodeAnnouncer druidNodeAnnouncer; private final DruidNode druidNode; @@ -120,7 +121,8 @@ public TaskToolbox( DruidNodeAnnouncer druidNodeAnnouncer, DruidNode druidNode, LookupNodeService lookupNodeService, - DataNodeService dataNodeService + DataNodeService dataNodeService, + TaskReportFileWriter taskReportFileWriter ) { this.config = config; @@ -147,6 +149,8 @@ public TaskToolbox( this.druidNode = druidNode; this.lookupNodeService = lookupNodeService; this.dataNodeService = dataNodeService; + this.taskReportFileWriter = taskReportFileWriter; + this.taskReportFileWriter.setObjectMapper(this.objectMapper); } public TaskConfig getConfig() @@ -303,4 +307,9 @@ public DruidNode getDruidNode() { return druidNode; } + + public TaskReportFileWriter getTaskReportFileWriter() + { + return taskReportFileWriter; + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java index c17b23fe210a..1a35ec040f8e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskToolboxFactory.java @@ -78,6 +78,7 @@ public class TaskToolboxFactory private final DruidNode druidNode; private final LookupNodeService lookupNodeService; private final DataNodeService dataNodeService; + private final TaskReportFileWriter taskReportFileWriter; @Inject public TaskToolboxFactory( @@ -103,7 +104,8 @@ public TaskToolboxFactory( DruidNodeAnnouncer druidNodeAnnouncer, @RemoteChatHandler DruidNode druidNode, LookupNodeService lookupNodeService, - DataNodeService dataNodeService + DataNodeService dataNodeService, + TaskReportFileWriter taskReportFileWriter ) { this.config = config; @@ -129,6 +131,7 @@ public TaskToolboxFactory( this.druidNode = druidNode; this.lookupNodeService = lookupNodeService; this.dataNodeService = dataNodeService; + this.taskReportFileWriter = taskReportFileWriter; } public TaskToolbox build(Task task) @@ -158,7 +161,8 @@ public TaskToolbox build(Task task) druidNodeAnnouncer, druidNode, lookupNodeService, - dataNodeService + dataNodeService, + taskReportFileWriter ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index 3181b252544a..cf408e14aaa0 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -326,6 +326,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } log.info("Job done!"); + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.success(getId()); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index 4386b5b35953..f8e80e569a36 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -229,6 +229,7 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception specVersion, version ); + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.failure(getId()); } } @@ -253,8 +254,10 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception ); toolbox.publishSegments(publishedSegments); + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.success(getId()); } else { + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.failure(getId()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 7a4dff26c85b..08f857ea4212 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -262,8 +262,10 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception } if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, versions, firehoseFactory, firehoseTempDir)) { + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.success(getId()); } else { + toolbox.getTaskReportFileWriter().write(null); return TaskStatus.failure(getId()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java index 1c09b56cd8f9..579234e6c07e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/FileTaskLogs.java @@ -53,7 +53,7 @@ public FileTaskLogs( public void pushTaskLog(final String taskid, File file) throws IOException { if (config.getDirectory().exists() || config.getDirectory().mkdirs()) { - final File outputFile = fileForTask(taskid); + final File outputFile = fileForTask(taskid, file.getName()); Files.copy(file, outputFile); log.info("Wrote task log to: %s", outputFile); } else { @@ -61,10 +61,22 @@ public void pushTaskLog(final String taskid, File file) throws IOException } } + @Override + public void pushTaskReports(String taskid, File reportFile) throws IOException + { + if (config.getDirectory().exists() || config.getDirectory().mkdirs()) { + final File outputFile = fileForTask(taskid, reportFile.getName()); + Files.copy(reportFile, outputFile); + log.info("Wrote task report to: %s", outputFile); + } else { + throw new IOE("Unable to create task report dir[%s]", config.getDirectory()); + } + } + @Override public Optional streamTaskLog(final String taskid, final long offset) { - final File file = fileForTask(taskid); + final File file = fileForTask(taskid, "log"); if (file.exists()) { return Optional.of( new ByteSource() @@ -81,9 +93,29 @@ public InputStream openStream() throws IOException } } - private File fileForTask(final String taskid) + @Override + public Optional streamTaskReports(final String taskid) + { + final File file = fileForTask(taskid, "report.json"); + if (file.exists()) { + return Optional.of( + new ByteSource() + { + @Override + public InputStream openStream() throws IOException + { + return LogUtils.streamFile(file, 0); + } + } + ); + } else { + return Optional.absent(); + } + } + + private File fileForTask(final String taskid, String filename) { - return new File(config.getDirectory(), StringUtils.format("%s.log", taskid)); + return new File(config.getDirectory(), StringUtils.format("%s.%s", taskid, filename)); } @Override diff --git a/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/SwitchingTaskLogStreamer.java b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/SwitchingTaskLogStreamer.java index cb8e0b34325d..3b7e33baa76c 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/SwitchingTaskLogStreamer.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/tasklogs/SwitchingTaskLogStreamer.java @@ -53,4 +53,17 @@ public Optional streamTaskLog(String taskid, long offset) throws IOE return Optional.absent(); } + + @Override + public Optional streamTaskReports(String taskid) throws IOException + { + for (TaskLogStreamer provider : providers) { + final Optional stream = provider.streamTaskReports(taskid); + if (stream.isPresent()) { + return stream; + } + } + + return Optional.absent(); + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java index 519c172f35f6..041dfd879f9d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ForkingTaskRunner.java @@ -260,6 +260,7 @@ public TaskStatus call() final File taskFile = new File(taskDir, "task.json"); final File statusFile = new File(attemptDir, "status.json"); final File logFile = new File(taskDir, "log"); + final File reportsFile = new File(attemptDir, "report.json"); // time to adjust process holders synchronized (tasks) { @@ -408,6 +409,7 @@ public TaskStatus call() command.add("peon"); command.add(taskFile.toString()); command.add(statusFile.toString()); + command.add(reportsFile.toString()); String nodeType = task.getNodeType(); if (nodeType != null) { command.add("--nodeType"); @@ -459,6 +461,9 @@ public TaskStatus call() Thread.currentThread().setName(priorThreadName); // Upload task logs taskLogPusher.pushTaskLog(task.getId(), logFile); + if (reportsFile.exists()) { + taskLogPusher.pushTaskReports(task.getId(), reportsFile); + } } TaskStatus status; diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index 8be1dcdbe95e..1fa59cebd1a8 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -732,6 +732,33 @@ public Response doGetLog( } } + @GET + @Path("/task/{taskid}/reports") + @Produces(MediaType.APPLICATION_JSON) + @ResourceFilters(TaskResourceFilter.class) + public Response doGetReports( + @PathParam("taskid") final String taskid + ) + { + try { + final Optional stream = taskLogStreamer.streamTaskReports(taskid); + if (stream.isPresent()) { + return Response.ok(stream.get().openStream()).build(); + } else { + return Response.status(Response.Status.NOT_FOUND) + .entity( + "No task reports were found for this task. " + + "The task may not exist, or it may not have completed yet." + ) + .build(); + } + } + catch (Exception e) { + log.warn(e, "Failed to stream task reports for task %s", taskid); + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).build(); + } + } + @GET @Path("/dataSources/{dataSource}") @Produces(MediaType.APPLICATION_JSON) diff --git a/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java b/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java index d06830eaf6d7..98e6382a43ae 100644 --- a/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java +++ b/indexing-service/src/main/java/io/druid/indexing/worker/executor/ExecutorLifecycle.java @@ -37,6 +37,7 @@ import io.druid.java.util.common.ISE; import io.druid.java.util.common.lifecycle.LifecycleStart; import io.druid.java.util.common.lifecycle.LifecycleStop; +import org.apache.commons.io.FileUtils; import java.io.File; import java.io.IOException; @@ -190,7 +191,7 @@ public TaskStatus apply(TaskStatus taskStatus) final File statusFileParent = statusFile.getParentFile(); if (statusFileParent != null) { - statusFileParent.mkdirs(); + FileUtils.forceMkdir(statusFileParent); } jsonMapper.writeValue(statusFile, taskStatus); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/TaskToolboxTest.java b/indexing-service/src/test/java/io/druid/indexing/common/TaskToolboxTest.java index 52bfe1ea891a..f73da9d2128f 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/TaskToolboxTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/TaskToolboxTest.java @@ -25,6 +25,7 @@ import io.druid.client.cache.CacheConfig; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.java.util.common.Intervals; import io.druid.java.util.emitter.service.ServiceEmitter; @@ -114,7 +115,8 @@ public void setUp() throws IOException null, null, null, - null + null, + new NoopTestTaskFileWriter() ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index 2761aacbdb64..abd451a9ed30 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -1243,7 +1243,8 @@ public List getLocations() EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), - new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0) + new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), + new NoopTestTaskFileWriter() ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java index 440ed912f0e0..27ecd4bb66a2 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java @@ -507,7 +507,8 @@ private static class TestTaskToolbox extends TaskToolbox null, null, null, - null + null, + new NoopTestTaskFileWriter() ); this.segmentFileMap = segmentFileMap; } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index f449c3ab05d1..8106c79122e8 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -1043,7 +1043,8 @@ public Map makeLoadSpec(URI uri) null, null, null, - null + null, + new NoopTestTaskFileWriter() ); indexTask.isReady(box.getTaskActionClient()); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java new file mode 100644 index 000000000000..cebee6c624f6 --- /dev/null +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java @@ -0,0 +1,36 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; + +public class NoopTestTaskFileWriter extends TaskReportFileWriter +{ + public NoopTestTaskFileWriter() + { + super(null); + } + + @Override + public void write(TaskReport report) + { + } +} diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java index 518d1d63b5d0..916c925e0d02 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java @@ -1084,7 +1084,8 @@ public List getLocations() EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), - new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0) + new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), + new NoopTestTaskFileWriter() ); return toolboxFactory.build(task); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/SameIntervalMergeTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/SameIntervalMergeTaskTest.java index c1cd914a8a37..e3c232db23c8 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/SameIntervalMergeTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/SameIntervalMergeTaskTest.java @@ -256,7 +256,8 @@ public void cleanup(DataSegment segment) null, null, null, - null + null, + new NoopTestTaskFileWriter() ) ); diff --git a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java index e311b514da8d..d85a1e44bd22 100644 --- a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTest.java @@ -50,6 +50,7 @@ import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.config.TaskStorageConfig; import io.druid.indexing.common.task.NoopTask; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.HeapMemoryTaskStorage; import io.druid.indexing.overlord.TaskLockbox; @@ -312,7 +313,8 @@ public DataSegment restore(DataSegment segment) null, null, null, - null + null, + new NoopTestTaskFileWriter() ); Collection values = new LinkedList<>(); for (InputRowParser parser : Arrays.asList( diff --git a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java index ff1a738260b9..14a52cbfcad7 100644 --- a/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/firehose/IngestSegmentFirehoseFactoryTimelineTest.java @@ -47,6 +47,7 @@ import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.task.NoopTask; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.Intervals; @@ -343,7 +344,8 @@ public TaskActionClient create(Task task) null, null, null, - null + null, + new NoopTestTaskFileWriter() ); final IngestSegmentFirehoseFactory factory = new IngestSegmentFirehoseFactory( DATA_SOURCE, diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java index 5fb348fc363d..c17452f654aa 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java @@ -63,6 +63,7 @@ import io.druid.indexing.common.task.IndexTask.IndexIngestionSpec; import io.druid.indexing.common.task.IndexTask.IndexTuningConfig; import io.druid.indexing.common.task.KillTask; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.RealtimeIndexTask; import io.druid.indexing.common.task.Task; import io.druid.indexing.common.task.TaskResource; @@ -605,7 +606,8 @@ public void unannounceSegments(Iterable segments) EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), - new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0) + new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), + new NoopTestTaskFileWriter() ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskManagerTest.java b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskManagerTest.java index 3b3ca7393fb5..9be677ae07ba 100644 --- a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskManagerTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskManagerTest.java @@ -35,6 +35,7 @@ import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.task.NoopTask; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.common.task.Tasks; import io.druid.indexing.overlord.ThreadPoolTaskRunner; @@ -125,7 +126,8 @@ public List getLocations() null, null, null, - null + null, + new NoopTestTaskFileWriter() ), taskConfig, new NoopServiceEmitter(), diff --git a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java index 1b74f0377388..51d27f0b2a48 100644 --- a/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/worker/WorkerTaskMonitorTest.java @@ -36,6 +36,7 @@ import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.actions.TaskActionClientFactory; import io.druid.indexing.common.config.TaskConfig; +import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.overlord.TestRemoteTaskRunnerConfig; import io.druid.indexing.overlord.ThreadPoolTaskRunner; @@ -190,7 +191,8 @@ public List getLocations() null, null, null, - null + null, + new NoopTestTaskFileWriter() ), taskConfig, new NoopServiceEmitter(), diff --git a/services/src/main/java/io/druid/cli/CliPeon.java b/services/src/main/java/io/druid/cli/CliPeon.java index a205dfab01b3..d7da80bae62e 100644 --- a/services/src/main/java/io/druid/cli/CliPeon.java +++ b/services/src/main/java/io/druid/cli/CliPeon.java @@ -54,6 +54,7 @@ import io.druid.guice.annotations.Smile; import io.druid.indexing.common.RetryPolicyConfig; import io.druid.indexing.common.RetryPolicyFactory; +import io.druid.indexing.common.TaskReportFileWriter; import io.druid.indexing.common.TaskToolboxFactory; import io.druid.indexing.common.actions.LocalTaskActionClientFactory; import io.druid.indexing.common.actions.RemoteTaskActionClientFactory; @@ -113,9 +114,18 @@ ) public class CliPeon extends GuiceRunnable { - @Arguments(description = "task.json status.json", required = true) + @Arguments(description = "task.json status.json report.json", required = true) public List taskAndStatusFile; + // path to store the task's stdout log + private String taskLogPath; + + // path to store the task's TaskStatus + private String taskStatusPath; + + // path to store the task's TaskReport objects + private String taskReportPath; + @Option(name = "--nodeType", title = "nodeType", description = "Set the node type to expose on ZK") public String nodeType = "indexer-executor"; @@ -141,6 +151,10 @@ protected List getModules() @Override public void configure(Binder binder) { + taskLogPath = taskAndStatusFile.get(0); + taskStatusPath = taskAndStatusFile.get(1); + taskReportPath = taskAndStatusFile.get(2); + binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/peon"); binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0); binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1); @@ -183,8 +197,14 @@ public void configure(Binder binder) LifecycleModule.register(binder, ExecutorLifecycle.class); binder.bind(ExecutorLifecycleConfig.class).toInstance( new ExecutorLifecycleConfig() - .setTaskFile(new File(taskAndStatusFile.get(0))) - .setStatusFile(new File(taskAndStatusFile.get(1))) + .setTaskFile(new File(taskLogPath)) + .setStatusFile(new File(taskStatusPath)) + ); + + binder.bind(TaskReportFileWriter.class).toInstance( + new TaskReportFileWriter( + new File(taskReportPath) + ) ); binder.bind(TaskRunner.class).to(ThreadPoolTaskRunner.class); From a81ae9902138eeb74e5776b9ea0cc7f640795920 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 3 Apr 2018 11:21:52 -0700 Subject: [PATCH 17/67] add 'stopped' check and handling to HttpLoadQueuePeon load and drop segment methods (#5555) * add stopped check and handling to HttpLoadQueuePeon load and drop segment methods * fix unrelated timeout :( * revert unintended change * PR feedback: change logging * fix dumb --- .../server/coordinator/HttpLoadQueuePeon.java | 50 +++--- .../CuratorDruidCoordinatorTest.java | 2 +- .../coordinator/HttpLoadQueuePeonTest.java | 151 ++++++++++-------- 3 files changed, 121 insertions(+), 82 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordinator/HttpLoadQueuePeon.java b/server/src/main/java/io/druid/server/coordinator/HttpLoadQueuePeon.java index dbeeb7386018..ece1d4884fa1 100644 --- a/server/src/main/java/io/druid/server/coordinator/HttpLoadQueuePeon.java +++ b/server/src/main/java/io/druid/server/coordinator/HttpLoadQueuePeon.java @@ -28,16 +28,16 @@ import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import io.druid.java.util.common.ISE; import io.druid.java.util.common.RE; import io.druid.java.util.common.StringUtils; +import io.druid.java.util.common.concurrent.ScheduledExecutors; import io.druid.java.util.emitter.EmittingLogger; import io.druid.java.util.http.client.HttpClient; import io.druid.java.util.http.client.Request; import io.druid.java.util.http.client.io.AppendableByteArrayInputStream; import io.druid.java.util.http.client.response.ClientResponse; import io.druid.java.util.http.client.response.InputStreamResponseHandler; -import io.druid.java.util.common.ISE; -import io.druid.java.util.common.concurrent.ScheduledExecutors; import io.druid.server.coordination.DataSegmentChangeCallback; import io.druid.server.coordination.DataSegmentChangeHandler; import io.druid.server.coordination.DataSegmentChangeRequest; @@ -61,7 +61,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ExecutorService; @@ -261,6 +260,7 @@ public void onSuccess(InputStream result) public void onFailure(Throwable t) { try { + responseHandler.description = t.toString(); logRequestFailure(t); } finally { @@ -333,20 +333,15 @@ public void start() ScheduledExecutors.scheduleAtFixedRate( processingExecutor, new Duration(config.getHttpLoadQueuePeonRepeatDelay()), - new Callable() - { - @Override - public ScheduledExecutors.Signal call() - { - if (!stopped) { - doSegmentManagement(); - } + () -> { + if (!stopped) { + doSegmentManagement(); + } - if (stopped) { - return ScheduledExecutors.Signal.STOP; - } else { - return ScheduledExecutors.Signal.REPEAT; - } + if (stopped) { + return ScheduledExecutors.Signal.STOP; + } else { + return ScheduledExecutors.Signal.REPEAT; } } ); @@ -364,11 +359,11 @@ public void stop() stopped = true; for (SegmentHolder holder : segmentsToDrop.values()) { - holder.requestSucceeded(); + holder.requestFailed("Stopping load queue peon."); } for (SegmentHolder holder : segmentsToLoad.values()) { - holder.requestSucceeded(); + holder.requestFailed("Stopping load queue peon."); } segmentsToDrop.clear(); @@ -382,6 +377,16 @@ public void stop() public void loadSegment(DataSegment segment, LoadPeonCallback callback) { synchronized (lock) { + if (stopped) { + log.warn( + "Server[%s] cannot load segment[%s] because load queue peon is stopped.", + serverId, + segment.getIdentifier() + ); + callback.execute(); + return; + } + SegmentHolder holder = segmentsToLoad.get(segment); if (holder == null) { @@ -398,6 +403,15 @@ public void loadSegment(DataSegment segment, LoadPeonCallback callback) public void dropSegment(DataSegment segment, LoadPeonCallback callback) { synchronized (lock) { + if (stopped) { + log.warn( + "Server[%s] cannot drop segment[%s] because load queue peon is stopped.", + serverId, + segment.getIdentifier() + ); + callback.execute(); + return; + } SegmentHolder holder = segmentsToDrop.get(segment); if (holder == null) { diff --git a/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java b/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java index 19e9c3a1bcf4..256d819cd908 100644 --- a/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java +++ b/server/src/test/java/io/druid/server/coordinator/CuratorDruidCoordinatorTest.java @@ -240,7 +240,7 @@ public void tearDown() throws Exception tearDownServerAndCurator(); } - @Test(timeout = 5_000) + @Test(timeout = 10_000) public void testMoveSegment() throws Exception { segmentViewInitLatch = new CountDownLatch(1); diff --git a/server/src/test/java/io/druid/server/coordinator/HttpLoadQueuePeonTest.java b/server/src/test/java/io/druid/server/coordinator/HttpLoadQueuePeonTest.java index 72fb9a36a5d5..c2388359a1ec 100644 --- a/server/src/test/java/io/druid/server/coordinator/HttpLoadQueuePeonTest.java +++ b/server/src/test/java/io/druid/server/coordinator/HttpLoadQueuePeonTest.java @@ -24,14 +24,14 @@ import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; -import io.druid.java.util.http.client.HttpClient; -import io.druid.java.util.http.client.Request; -import io.druid.java.util.http.client.response.HttpResponseHandler; import io.druid.discovery.DiscoveryDruidNode; import io.druid.discovery.DruidNodeDiscovery; import io.druid.java.util.common.Intervals; import io.druid.java.util.common.RE; import io.druid.java.util.common.concurrent.Execs; +import io.druid.java.util.http.client.HttpClient; +import io.druid.java.util.http.client.Request; +import io.druid.java.util.http.client.response.HttpResponseHandler; import io.druid.server.ServerTestHelper; import io.druid.server.coordination.DataSegmentChangeRequest; import io.druid.server.coordination.SegmentLoadDropHandler; @@ -57,40 +57,92 @@ */ public class HttpLoadQueuePeonTest { + final DataSegment segment1 = new DataSegment( + "test1", Intervals.of("2014/2015"), "v1", + null, null, null, null, 0, 0 + ); + + final DataSegment segment2 = new DataSegment( + "test2", Intervals.of("2014/2015"), "v1", + null, null, null, null, 0, 0 + ); + + final DataSegment segment3 = new DataSegment( + "test3", Intervals.of("2014/2015"), "v1", + null, null, null, null, 0, 0 + ); + + final DataSegment segment4 = new DataSegment( + "test4", Intervals.of("2014/2015"), "v1", + null, null, null, null, 0, 0 + ); + + final TestDruidCoordinatorConfig config = new TestDruidCoordinatorConfig( + null, + null, + null, + null, + null, + null, + 10, + null, + false, + false, + Duration.ZERO + ) + { + @Override + public int getHttpLoadQueuePeonBatchSize() + { + return 2; + } + }; + @Test(timeout = 10000) public void testSimple() throws Exception { - final DataSegment segment1 = new DataSegment( - "test1", Intervals.of("2014/2015"), "v1", - null, null, null, null, 0, 0 + HttpLoadQueuePeon httpLoadQueuePeon = new HttpLoadQueuePeon( + "http://dummy:4000", + ServerTestHelper.MAPPER, + new TestHttpClient(), + config, + Executors.newScheduledThreadPool( + 2, + Execs.makeThreadFactory("HttpLoadQueuePeonTest-%s") + ), + Execs.singleThreaded("HttpLoadQueuePeonTest") ); - final DataSegment segment2 = new DataSegment( - "test2", Intervals.of("2014/2015"), "v1", - null, null, null, null, 0, 0 - ); + httpLoadQueuePeon.start(); - final DataSegment segment3 = new DataSegment( - "test3", Intervals.of("2014/2015"), "v1", - null, null, null, null, 0, 0 + Map latches = ImmutableMap.of( + segment1.getIdentifier(), new CountDownLatch(1), + segment2.getIdentifier(), new CountDownLatch(1), + segment3.getIdentifier(), new CountDownLatch(1), + segment4.getIdentifier(), new CountDownLatch(1) ); - final DataSegment segment4 = new DataSegment( - "test4", Intervals.of("2014/2015"), "v1", - null, null, null, null, 0, 0 - ); + httpLoadQueuePeon.dropSegment(segment1, () -> latches.get(segment1.getIdentifier()).countDown()); + httpLoadQueuePeon.loadSegment(segment2, () -> latches.get(segment2.getIdentifier()).countDown()); + httpLoadQueuePeon.dropSegment(segment3, () -> latches.get(segment3.getIdentifier()).countDown()); + httpLoadQueuePeon.loadSegment(segment4, () -> latches.get(segment4.getIdentifier()).countDown()); + + latches.get(segment1.getIdentifier()).await(); + latches.get(segment2.getIdentifier()).await(); + latches.get(segment3.getIdentifier()).await(); + latches.get(segment4.getIdentifier()).await(); + httpLoadQueuePeon.stop(); + } + + @Test(timeout = 10000) + public void testLoadDropAfterStop() throws Exception + { HttpLoadQueuePeon httpLoadQueuePeon = new HttpLoadQueuePeon( "http://dummy:4000", ServerTestHelper.MAPPER, new TestHttpClient(), - new TestDruidCoordinatorConfig(null, null, null, null, null, null, 10, null, false, false, Duration.ZERO) { - @Override - public int getHttpLoadQueuePeonBatchSize() - { - return 2; - } - }, + config, Executors.newScheduledThreadPool( 2, Execs.makeThreadFactory("HttpLoadQueuePeonTest-%s") @@ -107,48 +159,16 @@ public int getHttpLoadQueuePeonBatchSize() segment4.getIdentifier(), new CountDownLatch(1) ); - httpLoadQueuePeon.dropSegment(segment1, new LoadPeonCallback() - { - @Override - public void execute() - { - latches.get(segment1.getIdentifier()).countDown(); - } - }); - - httpLoadQueuePeon.loadSegment(segment2, new LoadPeonCallback() - { - @Override - public void execute() - { - latches.get(segment2.getIdentifier()).countDown(); - } - }); - - httpLoadQueuePeon.dropSegment(segment3, new LoadPeonCallback() - { - @Override - public void execute() - { - latches.get(segment3.getIdentifier()).countDown(); - } - }); - - httpLoadQueuePeon.loadSegment(segment4, new LoadPeonCallback() - { - @Override - public void execute() - { - latches.get(segment4.getIdentifier()).countDown(); - } - }); - + httpLoadQueuePeon.dropSegment(segment1, () -> latches.get(segment1.getIdentifier()).countDown()); + httpLoadQueuePeon.loadSegment(segment2, () -> latches.get(segment2.getIdentifier()).countDown()); latches.get(segment1.getIdentifier()).await(); latches.get(segment2.getIdentifier()).await(); + httpLoadQueuePeon.stop(); + httpLoadQueuePeon.dropSegment(segment3, () -> latches.get(segment3.getIdentifier()).countDown()); + httpLoadQueuePeon.loadSegment(segment4, () -> latches.get(segment4.getIdentifier()).countDown()); latches.get(segment3.getIdentifier()).await(); latches.get(segment4.getIdentifier()).await(); - httpLoadQueuePeon.stop(); } private static class TestDruidNodeDiscovery implements DruidNodeDiscovery @@ -191,12 +211,17 @@ public ListenableFuture go( httpResponseHandler.handleResponse(httpResponse); try { List changeRequests = ServerTestHelper.MAPPER.readValue( - request.getContent().array(), new TypeReference>() {} + request.getContent().array(), new TypeReference>() + { + } ); List statuses = new ArrayList<>(changeRequests.size()); for (DataSegmentChangeRequest cr : changeRequests) { - statuses.add(new SegmentLoadDropHandler.DataSegmentChangeRequestAndStatus(cr, SegmentLoadDropHandler.Status.SUCCESS)); + statuses.add(new SegmentLoadDropHandler.DataSegmentChangeRequestAndStatus( + cr, + SegmentLoadDropHandler.Status.SUCCESS + )); } return (ListenableFuture) Futures.immediateFuture( new ByteArrayInputStream( From f31dba6c5bfc24d99c5c5c807419864d1e530a05 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 3 Apr 2018 11:22:51 -0700 Subject: [PATCH 18/67] Coordinator drop segment selection through cost balancer (#5529) * drop selection through cost balancer * use collections.emptyIterator * add test to ensure does not drop from server with larger loading queue with cost balancer * javadocs and comments to clear things up * random drop for completeness --- .../server/coordinator/BalancerStrategy.java | 46 +++++++++++++++++++ .../coordinator/CostBalancerStrategy.java | 46 +++++++++++++++---- .../coordinator/RandomBalancerStrategy.java | 9 ++++ .../server/coordinator/rules/LoadRule.java | 24 +++++++--- .../coordinator/rules/LoadRuleTest.java | 7 ++- 5 files changed, 116 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordinator/BalancerStrategy.java b/server/src/main/java/io/druid/server/coordinator/BalancerStrategy.java index e654cb44ecf4..ec498f1154a4 100644 --- a/server/src/main/java/io/druid/server/coordinator/BalancerStrategy.java +++ b/server/src/main/java/io/druid/server/coordinator/BalancerStrategy.java @@ -21,15 +21,61 @@ import io.druid.timeline.DataSegment; +import java.util.Iterator; import java.util.List; +import java.util.NavigableSet; +/** + * This interface describes the coordinator balancing strategy, which is responsible for making decisions on where + * to place {@link DataSegment}s on historical servers (described by {@link ServerHolder}). The balancing strategy + * is used by {@link io.druid.server.coordinator.rules.LoadRule} to assign and drop segments, and by + * {@link io.druid.server.coordinator.helper.DruidCoordinatorBalancer} to migrate segments between historicals. + */ public interface BalancerStrategy { + /** + * Find the best server to move a {@link DataSegment} to according the the balancing strategy. + * @param proposalSegment segment to move + * @param serverHolders servers to consider as move destinations + * @return The server to move to, or null if no move should be made or no server is suitable + */ ServerHolder findNewSegmentHomeBalancer(DataSegment proposalSegment, List serverHolders); + /** + * Find the best server on which to place a {@link DataSegment} replica according to the balancing strategy + * @param proposalSegment segment to replicate + * @param serverHolders servers to consider as replica holders + * @return The server to replicate to, or null if no suitable server is found + */ ServerHolder findNewSegmentHomeReplicator(DataSegment proposalSegment, List serverHolders); + /** + * Pick the best segment to move from one of the supplied set of servers according to the balancing strategy. + * @param serverHolders set of historicals to consider for moving segments + * @return {@link BalancerSegmentHolder} containing segment to move and server it current resides on + */ BalancerSegmentHolder pickSegmentToMove(List serverHolders); + /** + * Returns an iterator for a set of servers to drop from, ordered by preference of which server to drop from first + * for a given drop strategy. One or more segments may be dropped, depending on how much the segment is + * over-replicated. + * @param toDropSegment segment to drop from one or more servers + * @param serverHolders set of historicals to consider dropping from + * @return Iterator for set of historicals, ordered by drop preference + */ + default Iterator pickServersToDrop(DataSegment toDropSegment, NavigableSet serverHolders) + { + // By default, use the reverse order to get the holders with least available size first. + return serverHolders.descendingIterator(); + } + + /** + * Add balancing strategy stats during the 'balanceTier' operation of + * {@link io.druid.server.coordinator.helper.DruidCoordinatorBalancer} to be included + * @param tier historical tier being balanced + * @param stats stats object to add balancing strategy stats to + * @param serverHolderList servers in tier being balanced + */ void emitStats(String tier, CoordinatorStats stats, List serverHolderList); } diff --git a/server/src/main/java/io/druid/server/coordinator/CostBalancerStrategy.java b/server/src/main/java/io/druid/server/coordinator/CostBalancerStrategy.java index f241d5fbe09c..c5ea85096f84 100644 --- a/server/src/main/java/io/druid/server/coordinator/CostBalancerStrategy.java +++ b/server/src/main/java/io/druid/server/coordinator/CostBalancerStrategy.java @@ -32,9 +32,13 @@ import org.joda.time.Interval; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; import java.util.List; -import java.util.concurrent.Callable; +import java.util.NavigableSet; import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; public class CostBalancerStrategy implements BalancerStrategy { @@ -219,6 +223,37 @@ public BalancerSegmentHolder pickSegmentToMove(final List serverHo return sampler.getRandomBalancerSegmentHolder(serverHolders); } + @Override + public Iterator pickServersToDrop(DataSegment toDrop, NavigableSet serverHolders) + { + List>> futures = Lists.newArrayList(); + + for (final ServerHolder server : serverHolders) { + futures.add( + exec.submit( + () -> Pair.of(computeCost(toDrop, server, true), server) + ) + ); + } + + final ListenableFuture>> resultsFuture = Futures.allAsList(futures); + + try { + // results is an un-ordered list of a pair consisting of the 'cost' of a segment being on a server and the server + List> results = resultsFuture.get(); + return results.stream() + // Comparator.comapringDouble will order by lowest cost... + // reverse it because we want to drop from the highest cost servers first + .sorted(Comparator.comparingDouble((Pair o) -> o.lhs).reversed()) + .map(x -> x.rhs).collect(Collectors.toList()) + .iterator(); + } + catch (Exception e) { + log.makeAlert(e, "Cost Balancer Multithread strategy wasn't able to complete cost computation.").emit(); + } + return Collections.emptyIterator(); + } + /** * Calculates the initial cost of the Druid segment configuration. * @@ -341,14 +376,7 @@ protected Pair chooseBestServer( for (final ServerHolder server : serverHolders) { futures.add( exec.submit( - new Callable>() - { - @Override - public Pair call() - { - return Pair.of(computeCost(proposalSegment, server, includeCurrentServer), server); - } - } + () -> Pair.of(computeCost(proposalSegment, server, includeCurrentServer), server) ) ); } diff --git a/server/src/main/java/io/druid/server/coordinator/RandomBalancerStrategy.java b/server/src/main/java/io/druid/server/coordinator/RandomBalancerStrategy.java index 8c2aed3397ab..092811831198 100644 --- a/server/src/main/java/io/druid/server/coordinator/RandomBalancerStrategy.java +++ b/server/src/main/java/io/druid/server/coordinator/RandomBalancerStrategy.java @@ -21,7 +21,10 @@ import io.druid.timeline.DataSegment; +import java.util.Comparator; +import java.util.Iterator; import java.util.List; +import java.util.NavigableSet; import java.util.Random; public class RandomBalancerStrategy implements BalancerStrategy @@ -54,6 +57,12 @@ public BalancerSegmentHolder pickSegmentToMove(List serverHolders) return sampler.getRandomBalancerSegmentHolder(serverHolders); } + @Override + public Iterator pickServersToDrop(DataSegment toDropSegment, NavigableSet serverHolders) + { + return serverHolders.stream().sorted(Comparator.comparingDouble(o -> new Random().nextDouble())).iterator(); + } + @Override public void emitStats(String tier, CoordinatorStats stats, List serverHolderList) { diff --git a/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java b/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java index dc615f800518..094deddca60b 100644 --- a/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java +++ b/server/src/main/java/io/druid/server/coordinator/rules/LoadRule.java @@ -19,8 +19,9 @@ package io.druid.server.coordinator.rules; -import io.druid.java.util.emitter.EmittingLogger; import io.druid.java.util.common.IAE; +import io.druid.java.util.emitter.EmittingLogger; +import io.druid.server.coordinator.BalancerStrategy; import io.druid.server.coordinator.CoordinatorStats; import io.druid.server.coordinator.DruidCluster; import io.druid.server.coordinator.DruidCoordinator; @@ -39,6 +40,7 @@ import java.util.Map; import java.util.NavigableSet; import java.util.Objects; +import java.util.TreeSet; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -209,7 +211,7 @@ private ServerHolder assignPrimary( } /** - * @param stats {@link CoordinatorStats} to accumulate assignment statistics. + * @param stats {@link CoordinatorStats} to accumulate assignment statistics. * @param tierToSkip if not null, this tier will be skipped from doing assignment, use when primary replica was * assigned. */ @@ -320,7 +322,7 @@ private void drop( } else { final int currentReplicantsInTier = entry.getIntValue(); final int numToDrop = currentReplicantsInTier - targetReplicants.getOrDefault(tier, 0); - numDropped = dropForTier(numToDrop, holders, segment); + numDropped = dropForTier(numToDrop, holders, segment, params.getBalancerStrategy()); } stats.addToTieredStat(DROPPED_COUNT, tier, numDropped); @@ -346,13 +348,17 @@ private boolean loadingInProgress(final DruidCluster druidCluster) private static int dropForTier( final int numToDrop, final NavigableSet holdersInTier, - final DataSegment segment + final DataSegment segment, + final BalancerStrategy balancerStrategy ) { int numDropped = 0; - // Use the reverse order to get the holders with least available size first. - final Iterator iterator = holdersInTier.descendingIterator(); + final NavigableSet isServingSubset = + holdersInTier.stream().filter(s -> s.isServingSegment(segment)).collect(Collectors.toCollection(TreeSet::new)); + + final Iterator iterator = balancerStrategy.pickServersToDrop(segment, isServingSubset); + while (numDropped < numToDrop) { if (!iterator.hasNext()) { log.warn("Wtf, holder was null? I have no servers serving [%s]?", segment.getIdentifier()); @@ -364,6 +370,12 @@ private static int dropForTier( if (holder.isServingSegment(segment)) { holder.getPeon().dropSegment(segment, null); ++numDropped; + } else { + log.warn( + "Server [%s] is no longer serving segment [%s], skipping drop.", + holder.getServer().getName(), + segment.getIdentifier() + ); } } diff --git a/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java b/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java index 4848503a5738..850fc8b89376 100644 --- a/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java +++ b/server/src/test/java/io/druid/server/coordinator/rules/LoadRuleTest.java @@ -411,6 +411,9 @@ public void testDrop() final LoadQueuePeon mockPeon = createEmptyPeon(); mockPeon.dropSegment(EasyMock.anyObject(), EasyMock.anyObject()); EasyMock.expectLastCall().atLeastOnce(); + EasyMock.expect(mockBalancerStrategy.pickServersToDrop(EasyMock.anyObject(), EasyMock.anyObject())) + .andDelegateTo(balancerStrategy) + .times(2); EasyMock.replay(throttler, mockPeon, mockBalancerStrategy); LoadRule rule = createLoadRule(ImmutableMap.of( @@ -555,7 +558,9 @@ public void testDropWithNonExistentTier() final LoadQueuePeon mockPeon = createEmptyPeon(); mockPeon.dropSegment(EasyMock.anyObject(), EasyMock.anyObject()); EasyMock.expectLastCall().atLeastOnce(); - + EasyMock.expect(mockBalancerStrategy.pickServersToDrop(EasyMock.anyObject(), EasyMock.anyObject())) + .andDelegateTo(balancerStrategy) + .times(1); EasyMock.replay(throttler, mockPeon, mockBalancerStrategy); LoadRule rule = createLoadRule(ImmutableMap.of( From f0a94f50350cadc271730db916e22d254b6f3e90 Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Tue, 3 Apr 2018 13:23:46 -0700 Subject: [PATCH 19/67] Remove unused config (#5564) * Remove unused config * Fix failing tests --- .../coordinator/DruidCoordinatorConfig.java | 24 ------------------- .../DruidCoordinatorConfigTest.java | 8 ------- .../TestDruidCoordinatorConfig.java | 17 ------------- 3 files changed, 49 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordinator/DruidCoordinatorConfig.java b/server/src/main/java/io/druid/server/coordinator/DruidCoordinatorConfig.java index 5976b5d59c7c..51390c80a22b 100644 --- a/server/src/main/java/io/druid/server/coordinator/DruidCoordinatorConfig.java +++ b/server/src/main/java/io/druid/server/coordinator/DruidCoordinatorConfig.java @@ -39,30 +39,6 @@ public abstract class DruidCoordinatorConfig @Default("PT1800s") public abstract Duration getCoordinatorIndexingPeriod(); - @Config("druid.coordinator.merge.on") - public boolean isMergeSegments() - { - return false; - } - - @Config("druid.coordinator.conversion.on") - public boolean isConvertSegments() - { - return false; - } - - @Config("druid.coordinator.kill.on") - public boolean isKillSegments() - { - return false; - } - - @Config("druid.coordinator.kill.pendingSegments.on") - public boolean isKillPendingSegments() - { - return false; - } - @Config("druid.coordinator.kill.period") @Default("P1D") public abstract Duration getCoordinatorKillPeriod(); diff --git a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorConfigTest.java b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorConfigTest.java index 30e60a2a0edd..a2d7008bd46a 100644 --- a/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorConfigTest.java +++ b/server/src/test/java/io/druid/server/coordinator/DruidCoordinatorConfigTest.java @@ -42,10 +42,6 @@ public void testDeserialization() Assert.assertEquals(new Duration("PT300s"), config.getCoordinatorStartDelay()); Assert.assertEquals(new Duration("PT60s"), config.getCoordinatorPeriod()); Assert.assertEquals(new Duration("PT1800s"), config.getCoordinatorIndexingPeriod()); - Assert.assertFalse(config.isMergeSegments()); - Assert.assertFalse(config.isConvertSegments()); - Assert.assertFalse(config.isKillSegments()); - Assert.assertFalse(config.isKillPendingSegments()); Assert.assertEquals(86400000, config.getCoordinatorKillPeriod().getMillis()); Assert.assertEquals(-1000, config.getCoordinatorKillDurationToRetain().getMillis()); Assert.assertEquals(0, config.getCoordinatorKillMaxSegments()); @@ -75,10 +71,6 @@ public void testDeserialization() Assert.assertEquals(new Duration("PT1s"), config.getCoordinatorStartDelay()); Assert.assertEquals(new Duration("PT1s"), config.getCoordinatorPeriod()); Assert.assertEquals(new Duration("PT1s"), config.getCoordinatorIndexingPeriod()); - Assert.assertTrue(config.isMergeSegments()); - Assert.assertTrue(config.isConvertSegments()); - Assert.assertTrue(config.isKillSegments()); - Assert.assertTrue(config.isKillPendingSegments()); Assert.assertEquals(new Duration("PT1s"), config.getCoordinatorKillPeriod()); Assert.assertEquals(new Duration("PT1s"), config.getCoordinatorKillDurationToRetain()); Assert.assertEquals(10000, config.getCoordinatorKillMaxSegments()); diff --git a/server/src/test/java/io/druid/server/coordinator/TestDruidCoordinatorConfig.java b/server/src/test/java/io/druid/server/coordinator/TestDruidCoordinatorConfig.java index 40dd77016f61..ee53e7aad00b 100644 --- a/server/src/test/java/io/druid/server/coordinator/TestDruidCoordinatorConfig.java +++ b/server/src/test/java/io/druid/server/coordinator/TestDruidCoordinatorConfig.java @@ -35,9 +35,6 @@ public class TestDruidCoordinatorConfig extends DruidCoordinatorConfig private final String consoleStatic; - private final boolean mergeSegments; - private final boolean convertSegments; - public TestDruidCoordinatorConfig( Duration coordinatorStartDelay, Duration coordinatorPeriod, @@ -60,8 +57,6 @@ public TestDruidCoordinatorConfig( this.coordinatorKillDurationToRetain = coordinatorKillDurationToRetain; this.coordinatorKillMaxSegments = coordinatorKillMaxSegments; this.consoleStatic = consoleStatic; - this.mergeSegments = mergeSegments; - this.convertSegments = convertSegments; this.getLoadQueuePeonRepeatDelay = getLoadQueuePeonRepeatDelay; } @@ -83,18 +78,6 @@ public Duration getCoordinatorIndexingPeriod() return coordinatorIndexingPeriod; } - @Override - public boolean isMergeSegments() - { - return mergeSegments; - } - - @Override - public boolean isConvertSegments() - { - return convertSegments; - } - @Override public Duration getCoordinatorKillPeriod() { From 7239f56131d6bff3e9a1484c0f71960288d4ba1b Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Tue, 3 Apr 2018 21:15:58 -0700 Subject: [PATCH 20/67] Fix NPE in RemoteTaskRunner when some tasks in ZooKeeper but not in Overlord (#5511) * Fix NPE in RemoteTaskRunner when some tasks in ZooKeeper but not in Overlord * revert unnecessary change --- .../indexing/overlord/RemoteTaskRunner.java | 2 +- .../indexing/overlord/hrtr/WorkerHolder.java | 6 +++-- .../indexing/worker/TaskAnnouncement.java | 26 ++++++++++++++---- .../indexing/worker/WorkerTaskMonitor.java | 3 ++- .../overlord/RemoteTaskRunnerTest.java | 27 +++++++++++++++++++ 5 files changed, 55 insertions(+), 9 deletions(-) diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java index ed7dbc3505d7..8b06b195157b 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/RemoteTaskRunner.java @@ -968,7 +968,7 @@ public void childEvent(CuratorFramework client, PathChildrenCacheEvent event) announcement.getTaskType(), zkWorker.getWorker(), TaskLocation.unknown(), - runningTasks.get(taskId).getDataSource() + announcement.getTaskDataSource() ); final RemoteTaskRunnerWorkItem existingItem = runningTasks.putIfAbsent( taskId, diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java b/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java index 17fa67f7cece..7adc43419d9e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/hrtr/WorkerHolder.java @@ -387,7 +387,8 @@ public void fullSync(List changes) announcement.getTaskType(), announcement.getTaskResource(), TaskStatus.failure(announcement.getTaskId()), - announcement.getTaskLocation() + announcement.getTaskLocation(), + announcement.getTaskDataSource() )); } } @@ -423,7 +424,8 @@ public void deltaSync(List changes) announcement.getTaskType(), announcement.getTaskResource(), TaskStatus.failure(announcement.getTaskId()), - announcement.getTaskLocation() + announcement.getTaskLocation(), + announcement.getTaskDataSource() )); } } else if (change instanceof WorkerHistoryItem.Metadata) { diff --git a/indexing-service/src/main/java/io/druid/indexing/worker/TaskAnnouncement.java b/indexing-service/src/main/java/io/druid/indexing/worker/TaskAnnouncement.java index 5079d8b04b48..7ab4cc6129e3 100644 --- a/indexing-service/src/main/java/io/druid/indexing/worker/TaskAnnouncement.java +++ b/indexing-service/src/main/java/io/druid/indexing/worker/TaskAnnouncement.java @@ -28,6 +28,8 @@ import io.druid.indexing.common.task.Task; import io.druid.indexing.common.task.TaskResource; +import javax.annotation.Nullable; + /** * Used by workers to announce the status of tasks they are currently running. This class is immutable. */ @@ -38,9 +40,12 @@ public class TaskAnnouncement private final TaskResource taskResource; private final TaskLocation taskLocation; + @Nullable + private final String taskDataSource; // nullable for backward compatibility + public static TaskAnnouncement create(Task task, TaskStatus status, TaskLocation location) { - return create(task.getId(), task.getType(), task.getTaskResource(), status, location); + return create(task.getId(), task.getType(), task.getTaskResource(), status, location, task.getDataSource()); } public static TaskAnnouncement create( @@ -48,11 +53,12 @@ public static TaskAnnouncement create( String taskType, TaskResource resource, TaskStatus status, - TaskLocation location + TaskLocation location, + String taskDataSource ) { Preconditions.checkArgument(status.getId().equals(taskId), "task id == status id"); - return new TaskAnnouncement(null, taskType, null, status, resource, location); + return new TaskAnnouncement(null, taskType, null, status, resource, location, taskDataSource); } @JsonCreator @@ -62,7 +68,8 @@ private TaskAnnouncement( @JsonProperty("status") TaskState status, @JsonProperty("taskStatus") TaskStatus taskStatus, @JsonProperty("taskResource") TaskResource taskResource, - @JsonProperty("taskLocation") TaskLocation taskLocation + @JsonProperty("taskLocation") TaskLocation taskLocation, + @JsonProperty("taskDataSource") String taskDataSource ) { this.taskType = taskType; @@ -74,6 +81,7 @@ private TaskAnnouncement( } this.taskResource = taskResource == null ? new TaskResource(this.taskStatus.getId(), 1) : taskResource; this.taskLocation = taskLocation == null ? TaskLocation.unknown() : taskLocation; + this.taskDataSource = taskDataSource; } @JsonProperty("id") @@ -112,13 +120,21 @@ public TaskLocation getTaskLocation() return taskLocation; } + @JsonProperty("taskDataSource") + public String getTaskDataSource() + { + return taskDataSource; + } + @Override public String toString() { return "TaskAnnouncement{" + - "taskStatus=" + taskStatus + + "taskType=" + taskType + + ", taskStatus=" + taskStatus + ", taskResource=" + taskResource + ", taskLocation=" + taskLocation + + ", taskDataSource=" + taskDataSource + '}'; } } diff --git a/indexing-service/src/main/java/io/druid/indexing/worker/WorkerTaskMonitor.java b/indexing-service/src/main/java/io/druid/indexing/worker/WorkerTaskMonitor.java index 66eb0a40d931..d5ff7d3854cb 100644 --- a/indexing-service/src/main/java/io/druid/indexing/worker/WorkerTaskMonitor.java +++ b/indexing-service/src/main/java/io/druid/indexing/worker/WorkerTaskMonitor.java @@ -136,7 +136,8 @@ private void cleanupStaleAnnouncements() throws Exception announcement.getTaskType(), announcement.getTaskResource(), completionStatus, - TaskLocation.unknown() + TaskLocation.unknown(), + announcement.getTaskDataSource() ) ); } diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java index d9ce5c2bf7c9..a5e230e0c135 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/RemoteTaskRunnerTest.java @@ -447,6 +447,33 @@ public void testWorkerDisabled() throws Exception Assert.assertEquals("", Iterables.getOnlyElement(remoteTaskRunner.getWorkers()).getWorker().getVersion()); } + @Test + public void testRestartRemoteTaskRunner() throws Exception + { + doSetup(); + remoteTaskRunner.run(task); + + Assert.assertTrue(taskAnnounced(task.getId())); + mockWorkerRunningTask(task); + Assert.assertTrue(workerRunningTask(task.getId())); + + remoteTaskRunner.stop(); + makeRemoteTaskRunner(new TestRemoteTaskRunnerConfig(new Period("PT5S"))); + final RemoteTaskRunnerWorkItem newWorkItem = remoteTaskRunner + .getKnownTasks() + .stream() + .filter(workItem -> workItem.getTaskId().equals(task.getId())) + .findFirst() + .orElse(null); + final ListenableFuture result = newWorkItem.getResult(); + + mockWorkerCompleteSuccessfulTask(task); + Assert.assertTrue(workerCompletedTask(result)); + + Assert.assertEquals(task.getId(), result.get().getId()); + Assert.assertEquals(TaskState.SUCCESS, result.get().getStatusCode()); + } + private void doSetup() throws Exception { makeWorker(); From 818091ec603fa172488e309fc462c146651bf3bd Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Thu, 5 Apr 2018 14:16:06 -0700 Subject: [PATCH 21/67] Add overlord unsecured paths to coordinator when using combined service (#5579) * Add overlord unsecured paths to coordinator when using combined service * PR comment --- services/src/main/java/io/druid/cli/CliOverlord.java | 3 +-- .../java/io/druid/cli/CoordinatorJettyServerInitializer.java | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/services/src/main/java/io/druid/cli/CliOverlord.java b/services/src/main/java/io/druid/cli/CliOverlord.java index a6b7fbfab9bd..51ca382590d4 100644 --- a/services/src/main/java/io/druid/cli/CliOverlord.java +++ b/services/src/main/java/io/druid/cli/CliOverlord.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; import com.google.inject.Binder; import com.google.inject.Injector; import com.google.inject.Key; @@ -118,7 +117,7 @@ public class CliOverlord extends ServerRunnable { private static Logger log = new Logger(CliOverlord.class); - private static List UNSECURED_PATHS = Lists.newArrayList( + protected static List UNSECURED_PATHS = ImmutableList.of( "/", "/console.html", "/old-console/*", diff --git a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java index 09a5a7b519e2..7a0562f3dfd4 100644 --- a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java @@ -118,6 +118,10 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + if (beOverlord) { + AuthenticationUtils.addNoopAuthorizationFilters(root, CliOverlord.UNSECURED_PATHS); + } + authenticators = authenticatorMapper.getAuthenticatorChain(); AuthenticationUtils.addAuthenticationFilterChain(root, authenticators); From 969342cd287dc022c8e762b1353c433012363cb8 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Thu, 5 Apr 2018 21:38:57 -0700 Subject: [PATCH 22/67] More error reporting and stats for ingestion tasks (#5418) * Add more indexing task status and error reporting * PR comments, add support in AppenderatorDriverRealtimeIndexTask * Use TaskReport instead of metrics/context * Fix tests * Use TaskReport uploads * Refactor fire department metrics retrieval * Refactor input row serde in hadoop task * Refactor hadoop task loader names * Truncate error message in TaskStatus, add errorMsg to task report * PR comments --- .../data/input/impl/MapInputRowParser.java | 2 +- .../java/io/druid/indexer/IngestionState.java | 28 + .../io/druid/indexer/TaskMetricsGetter.java | 29 + .../io/druid/indexer/TaskMetricsUtils.java | 47 ++ .../java/io/druid/indexer/TaskStatusPlus.java | 71 +-- .../java/io/druid/utils/CircularBuffer.java | 92 ++++ .../io/druid/indexer/TaskStatusPlusTest.java | 3 +- .../FilteredAggregatorBenchmark.java | 2 +- .../IncrementalIndexRowTypeBenchmark.java | 6 +- .../indexing/IndexIngestionBenchmark.java | 2 +- .../src/main/java/io/druid/indexer/Jobby.java | 28 + .../input/orc/OrcIndexGeneratorJobTest.java | 4 +- .../druid/indexing/kafka/KafkaIndexTask.java | 221 ++++++-- .../indexing/kafka/KafkaTuningConfig.java | 63 ++- .../kafka/supervisor/KafkaSupervisorSpec.java | 3 + .../KafkaSupervisorTuningConfig.java | 13 +- .../indexing/kafka/KafkaIndexTaskTest.java | 275 +++++++-- .../indexing/kafka/KafkaTuningConfigTest.java | 3 + .../kafka/supervisor/KafkaSupervisorTest.java | 3 + .../indexer/DetermineHashedPartitionsJob.java | 47 +- .../druid/indexer/DeterminePartitionsJob.java | 48 +- .../HadoopDruidDetermineConfigurationJob.java | 25 +- .../indexer/HadoopDruidIndexerConfig.java | 16 +- .../druid/indexer/HadoopDruidIndexerJob.java | 26 +- .../indexer/HadoopDruidIndexerMapper.java | 76 ++- .../io/druid/indexer/HadoopTuningConfig.java | 42 +- .../io/druid/indexer/IndexGeneratorJob.java | 78 ++- .../java/io/druid/indexer/InputRowSerde.java | 98 +++- .../main/java/io/druid/indexer/JobHelper.java | 39 +- .../src/main/java/io/druid/indexer/Utils.java | 26 + .../indexer/BatchDeltaIngestionTest.java | 4 +- .../DetermineHashedPartitionsJobTest.java | 2 + .../indexer/DeterminePartitionsJobTest.java | 2 + .../indexer/HadoopDruidIndexerConfigTest.java | 4 + .../indexer/HadoopDruidIndexerMapperTest.java | 66 +++ .../druid/indexer/HadoopTuningConfigTest.java | 2 + .../indexer/IndexGeneratorCombinerTest.java | 8 +- .../druid/indexer/IndexGeneratorJobTest.java | 4 +- .../io/druid/indexer/InputRowSerdeTest.java | 41 +- .../java/io/druid/indexer/JobHelperTest.java | 2 + .../indexer/path/GranularityPathSpecTest.java | 2 + .../updater/HadoopConverterJobTest.java | 4 +- .../IngestionStatsAndErrorsTaskReport.java | 102 ++++ ...IngestionStatsAndErrorsTaskReportData.java | 119 ++++ .../io/druid/indexing/common/TaskReport.java | 1 + .../indexing/common/TaskReportFileWriter.java | 5 +- .../io/druid/indexing/common/TaskStatus.java | 74 ++- .../RealtimeAppenderatorTuningConfig.java | 43 +- .../common/index/YeOldePlumberSchool.java | 2 +- .../AppenderatorDriverRealtimeIndexTask.java | 187 ++++++- .../indexing/common/task/CompactionTask.java | 12 +- .../indexing/common/task/HadoopIndexTask.java | 515 ++++++++++++++--- .../indexing/common/task/HadoopTask.java | 28 + .../druid/indexing/common/task/IndexTask.java | 521 +++++++++++++++--- .../indexing/common/task/IndexTaskUtils.java | 78 +++ .../overlord/ThreadPoolTaskRunner.java | 4 +- .../overlord/http/OverlordResource.java | 7 +- ...penderatorDriverRealtimeIndexTaskTest.java | 317 +++++++++-- .../common/task/CompactionTaskTest.java | 29 +- .../indexing/common/task/IndexTaskTest.java | 492 ++++++++++++++++- .../common/task/NoopTestTaskFileWriter.java | 4 +- .../common/task/RealtimeIndexTaskTest.java | 16 +- .../common/task/TaskReportSerdeTest.java | 80 +++ .../indexing/common/task/TaskSerdeTest.java | 47 +- .../indexing/overlord/TaskLifecycleTest.java | 67 ++- .../util/common/parsers/ObjectFlatteners.java | 2 +- .../util/common/parsers/ParseException.java | 12 + .../segment/incremental/IncrementalIndex.java | 126 ++++- .../IncrementalIndexAddResult.java | 52 ++ .../incremental/OffheapIncrementalIndex.java | 4 +- .../incremental/OnheapIncrementalIndex.java | 27 +- .../incremental/IncrementalIndexTest.java | 34 +- .../OnheapIncrementalIndexBenchmark.java | 5 +- .../incremental/TimeAndDimsCompTest.java | 12 +- .../druid/segment/indexing/TuningConfig.java | 3 + .../realtime/FireDepartmentMetrics.java | 14 +- ...ireDepartmentMetricsTaskMetricsGetter.java | 64 +++ .../realtime/appenderator/Appenderator.java | 18 +- .../AppenderatorDriverAddResult.java | 28 +- .../appenderator/AppenderatorImpl.java | 7 +- .../appenderator/BaseAppenderatorDriver.java | 3 +- .../realtime/plumber/RealtimePlumber.java | 8 +- .../druid/segment/realtime/plumber/Sink.java | 5 +- .../coordination/ChangeRequestHistory.java | 44 +- .../segment/realtime/RealtimeManagerTest.java | 2 +- .../StreamAppenderatorDriverFailTest.java | 2 +- .../ChangeRequestHistoryTest.java | 3 +- 87 files changed, 4164 insertions(+), 618 deletions(-) create mode 100644 api/src/main/java/io/druid/indexer/IngestionState.java create mode 100644 api/src/main/java/io/druid/indexer/TaskMetricsGetter.java create mode 100644 api/src/main/java/io/druid/indexer/TaskMetricsUtils.java create mode 100644 api/src/main/java/io/druid/utils/CircularBuffer.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java create mode 100644 indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java create mode 100644 indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java create mode 100644 processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java create mode 100644 server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java diff --git a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java index 3fa2305a7007..49d40fd88c47 100644 --- a/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java +++ b/api/src/main/java/io/druid/data/input/impl/MapInputRowParser.java @@ -72,7 +72,7 @@ public List parseBatch(Map theMap) } } catch (Exception e) { - throw new ParseException(e, "Unparseable timestamp found!"); + throw new ParseException(e, "Unparseable timestamp found! Event: %s", theMap); } return ImmutableList.of(new MapBasedInputRow(timestamp.getMillis(), dimensions, theMap)); diff --git a/api/src/main/java/io/druid/indexer/IngestionState.java b/api/src/main/java/io/druid/indexer/IngestionState.java new file mode 100644 index 000000000000..9a52e3ee1c87 --- /dev/null +++ b/api/src/main/java/io/druid/indexer/IngestionState.java @@ -0,0 +1,28 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +public enum IngestionState +{ + NOT_STARTED, + DETERMINE_PARTITIONS, + BUILD_SEGMENTS, + COMPLETED +} diff --git a/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java new file mode 100644 index 000000000000..c29e890010cb --- /dev/null +++ b/api/src/main/java/io/druid/indexer/TaskMetricsGetter.java @@ -0,0 +1,29 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +import java.util.List; +import java.util.Map; + +public interface TaskMetricsGetter +{ + List getKeys(); + Map getTotalMetrics(); +} diff --git a/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java b/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java new file mode 100644 index 000000000000..5845b245aa07 --- /dev/null +++ b/api/src/main/java/io/druid/indexer/TaskMetricsUtils.java @@ -0,0 +1,47 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer; + +import com.google.common.collect.Maps; + +import java.util.Map; + +public class TaskMetricsUtils +{ + public static final String ROWS_PROCESSED = "rowsProcessed"; + public static final String ROWS_PROCESSED_WITH_ERRORS = "rowsProcessedWithErrors"; + public static final String ROWS_UNPARSEABLE = "rowsUnparseable"; + public static final String ROWS_THROWN_AWAY = "rowsThrownAway"; + + public static Map makeIngestionRowMetrics( + long rowsProcessed, + long rowsProcessedWithErrors, + long rowsUnparseable, + long rowsThrownAway + ) + { + Map metricsMap = Maps.newHashMap(); + metricsMap.put(ROWS_PROCESSED, rowsProcessed); + metricsMap.put(ROWS_PROCESSED_WITH_ERRORS, rowsProcessedWithErrors); + metricsMap.put(ROWS_UNPARSEABLE, rowsUnparseable); + metricsMap.put(ROWS_THROWN_AWAY, rowsThrownAway); + return metricsMap; + } +} diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java index d8d93d6055d5..898532637872 100644 --- a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java +++ b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java @@ -38,6 +38,9 @@ public class TaskStatusPlus private final TaskLocation location; private final String dataSource; + @Nullable + private final String errorMsg; + @JsonCreator public TaskStatusPlus( @JsonProperty("id") String id, @@ -47,7 +50,8 @@ public TaskStatusPlus( @JsonProperty("statusCode") @Nullable TaskState state, @JsonProperty("duration") @Nullable Long duration, @JsonProperty("location") TaskLocation location, - @JsonProperty("dataSource") String dataSource + @JsonProperty("dataSource") String dataSource, + @JsonProperty("errorMsg") String errorMsg ) { if (state != null && state.isComplete()) { @@ -61,6 +65,7 @@ public TaskStatusPlus( this.duration = duration; this.location = Preconditions.checkNotNull(location, "location"); this.dataSource = dataSource; + this.errorMsg = errorMsg; } @JsonProperty @@ -108,49 +113,53 @@ public TaskLocation getLocation() return location; } + @JsonProperty + public String getDataSource() + { + return dataSource; + } + + @Nullable + @JsonProperty("errorMsg") + public String getErrorMsg() + { + return errorMsg; + } + @Override public boolean equals(Object o) { if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { return false; } - - final TaskStatusPlus that = (TaskStatusPlus) o; - if (!id.equals(that.id)) { - return false; - } - if (!type.equals(that.type)) { - return false; - } - if (!createdTime.equals(that.createdTime)) { - return false; - } - if (!queueInsertionTime.equals(that.queueInsertionTime)) { - return false; - } - if (!Objects.equals(state, that.state)) { - return false; - } - if (!Objects.equals(duration, that.duration)) { - return false; - } - return location.equals(that.location); + TaskStatusPlus that = (TaskStatusPlus) o; + return Objects.equals(getId(), that.getId()) && + Objects.equals(getType(), that.getType()) && + Objects.equals(getCreatedTime(), that.getCreatedTime()) && + Objects.equals(getQueueInsertionTime(), that.getQueueInsertionTime()) && + getState() == that.getState() && + Objects.equals(getDuration(), that.getDuration()) && + Objects.equals(getLocation(), that.getLocation()) && + Objects.equals(getDataSource(), that.getDataSource()) && + Objects.equals(getErrorMsg(), that.getErrorMsg()); } @Override public int hashCode() { - return Objects.hash(id, type, createdTime, queueInsertionTime, state, duration, location); - } - - @JsonProperty - public String getDataSource() - { - return dataSource; + return Objects.hash( + getId(), + getType(), + getCreatedTime(), + getQueueInsertionTime(), + getState(), + getDuration(), + getLocation(), + getDataSource(), + getErrorMsg() + ); } - } diff --git a/api/src/main/java/io/druid/utils/CircularBuffer.java b/api/src/main/java/io/druid/utils/CircularBuffer.java new file mode 100644 index 000000000000..e5f8158e0efe --- /dev/null +++ b/api/src/main/java/io/druid/utils/CircularBuffer.java @@ -0,0 +1,92 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.utils; + +import com.google.common.base.Preconditions; + +/** + * A circular buffer that supports random bidirectional access. + * + * @param Type of object to be stored in the buffer + */ +public class CircularBuffer +{ + public E[] getBuffer() + { + return buffer; + } + + private final E[] buffer; + + private int start = 0; + private int size = 0; + + public CircularBuffer(int capacity) + { + Preconditions.checkArgument(capacity > 0, "Capacity must be greater than 0."); + buffer = (E[]) new Object[capacity]; + } + + public void add(E item) + { + buffer[start++] = item; + + if (start >= buffer.length) { + start = 0; + } + + if (size < buffer.length) { + size++; + } + } + + /** + * Access object at a given index, starting from the latest entry added and moving backwards. + */ + public E getLatest(int index) + { + Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); + + int bufferIndex = start - index - 1; + if (bufferIndex < 0) { + bufferIndex = buffer.length + bufferIndex; + } + return buffer[bufferIndex]; + } + + /** + * Access object at a given index, starting from the earliest entry added and moving forward. + */ + public E get(int index) + { + Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); + + int bufferIndex = (start - size + index) % buffer.length; + if (bufferIndex < 0) { + bufferIndex += buffer.length; + } + return buffer[bufferIndex]; + } + + public int size() + { + return size; + } +} diff --git a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java index 200a6b4c701b..eeefe130e5be 100644 --- a/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java +++ b/api/src/test/java/io/druid/indexer/TaskStatusPlusTest.java @@ -53,7 +53,8 @@ public void testSerde() throws IOException TaskState.RUNNING, 1000L, TaskLocation.create("testHost", 1010, -1), - "ds_test" + "ds_test", + null ); final String json = mapper.writeValueAsString(status); Assert.assertEquals(status, mapper.readValue(json, TaskStatusPlus.class)); diff --git a/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java index 9848e3e62f1b..6c90ba0407e1 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/FilteredAggregatorBenchmark.java @@ -255,7 +255,7 @@ public void ingest(Blackhole blackhole) throws Exception { incIndexFilteredAgg = makeIncIndex(filteredMetrics); for (InputRow row : inputRows) { - int rv = incIndexFilteredAgg.add(row); + int rv = incIndexFilteredAgg.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java index 6af9a93f79e3..bd622f6017f9 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/IncrementalIndexRowTypeBenchmark.java @@ -160,7 +160,7 @@ public void normalLongs(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = longRows.get(i); - int rv = incIndex.add(row); + int rv = incIndex.add(row).getRowCount(); blackhole.consume(rv); } } @@ -173,7 +173,7 @@ public void normalFloats(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = floatRows.get(i); - int rv = incFloatIndex.add(row); + int rv = incFloatIndex.add(row).getRowCount(); blackhole.consume(rv); } } @@ -186,7 +186,7 @@ public void normalStrings(Blackhole blackhole) throws Exception { for (int i = 0; i < maxRows; i++) { InputRow row = stringRows.get(i); - int rv = incStrIndex.add(row); + int rv = incStrIndex.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java index 4fc7cf2b8d72..521ae869ab8d 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/indexing/IndexIngestionBenchmark.java @@ -119,7 +119,7 @@ public void addRows(Blackhole blackhole) throws Exception { for (int i = 0; i < rowsPerSegment; i++) { InputRow row = rows.get(i); - int rv = incIndex.add(row); + int rv = incIndex.add(row).getRowCount(); blackhole.consume(rv); } } diff --git a/common/src/main/java/io/druid/indexer/Jobby.java b/common/src/main/java/io/druid/indexer/Jobby.java index 4423cad03684..b0d26affdf40 100644 --- a/common/src/main/java/io/druid/indexer/Jobby.java +++ b/common/src/main/java/io/druid/indexer/Jobby.java @@ -19,9 +19,37 @@ package io.druid.indexer; +import io.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; +import java.util.Map; + /** */ public interface Jobby { boolean run(); + + /** + * @return A map containing statistics for a Jobby, optionally null if the Jobby is unable to provide stats. + */ + @Nullable + default Map getStats() + { + throw new UnsupportedOperationException( + StringUtils.format("This Jobby does not implement getJobStats(), Jobby class: [%s]", getClass()) + ); + } + + /** + * @return A string representing the error that caused a Jobby to fail. Can be null if the Jobby did not fail, + * or is unable to provide an error message. + */ + @Nullable + default String getErrorMessage() + { + throw new UnsupportedOperationException( + StringUtils.format("This Jobby does not implement getErrorMessage(), Jobby class: [%s]", getClass()) + ); + } } diff --git a/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java b/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java index f073ce0b5691..bf5383b5fbf6 100644 --- a/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java +++ b/extensions-contrib/orc-extensions/src/test/java/io/druid/data/input/orc/OrcIndexGeneratorJobTest.java @@ -234,6 +234,8 @@ public void setUp() throws Exception null, false, false, + null, + null, null ) ) @@ -252,7 +254,7 @@ public void testIndexGeneratorJob() throws IOException private void verifyJob(IndexGeneratorJob job) throws IOException { - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); int segmentNum = 0; for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java index 82558f0c796a..ab69c43b212d 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaIndexTask.java @@ -49,8 +49,13 @@ import io.druid.discovery.DiscoveryDruidNode; import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.CheckPointDataSourceMetadataAction; @@ -58,6 +63,7 @@ import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.common.task.AbstractTask; +import io.druid.indexing.common.task.IndexTaskUtils; import io.druid.indexing.common.task.RealtimeIndexTask; import io.druid.indexing.common.task.TaskResource; import io.druid.indexing.common.task.Tasks; @@ -80,6 +86,7 @@ import io.druid.segment.indexing.RealtimeIOConfig; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult; @@ -92,13 +99,9 @@ import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.server.security.Access; import io.druid.server.security.Action; -import io.druid.server.security.AuthorizationUtils; import io.druid.server.security.AuthorizerMapper; -import io.druid.server.security.ForbiddenException; -import io.druid.server.security.Resource; -import io.druid.server.security.ResourceAction; -import io.druid.server.security.ResourceType; import io.druid.timeline.DataSegment; +import io.druid.utils.CircularBuffer; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; @@ -246,6 +249,11 @@ public enum Status private volatile CopyOnWriteArrayList sequences; private ListeningExecutorService publishExecService; private final boolean useLegacy; + private CircularBuffer savedParseExceptions; + private IngestionState ingestionState; + + private TaskMetricsGetter metricsGetter; + private String errorMsg; @JsonCreator public KafkaIndexTask( @@ -276,6 +284,7 @@ public KafkaIndexTask( this.endOffsets.putAll(ioConfig.getEndPartitions().getPartitionOffsetMap()); this.topic = ioConfig.getStartPartitions().getTopic(); this.sequences = new CopyOnWriteArrayList<>(); + this.ingestionState = IngestionState.NOT_STARTED; if (context != null && context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED) != null && ((boolean) context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED))) { @@ -283,6 +292,9 @@ public KafkaIndexTask( } else { useLegacy = true; } + if (tuningConfig.getMaxSavedParseExceptions() > 0) { + savedParseExceptions = new CircularBuffer(tuningConfig.getMaxSavedParseExceptions()); + } resetNextCheckpointTime(); } @@ -414,11 +426,27 @@ private void createAndStartPublishExecutor() @Override public TaskStatus run(final TaskToolbox toolbox) throws Exception { - // for backwards compatibility, should be remove from versions greater than 0.12.x - if (useLegacy) { - return runLegacy(toolbox); + try { + // for backwards compatibility, should be remove from versions greater than 0.12.x + if (useLegacy) { + return runInternalLegacy(toolbox); + } else { + return runInternal(toolbox); + } } + catch (Exception e) { + log.error(e, "Encountered exception while running task."); + errorMsg = Throwables.getStackTraceAsString(e); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); + } + } + private TaskStatus runInternal(final TaskToolbox toolbox) throws Exception + { log.info("Starting up!"); startTime = DateTimes.nowUtc(); @@ -484,6 +512,7 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception null ); fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics); toolbox.getMonitorScheduler().addMonitor( new RealtimeMetricsMonitor( ImmutableList.of(fireDepartmentForMetrics), @@ -595,6 +624,8 @@ public void run() Set assignment = assignPartitionsAndSeekToNext(consumer, topic); + ingestionState = IngestionState.BUILD_SEGMENTS; + // Main loop. // Could eventually support leader/follower mode (for keeping replicas more in sync) boolean stillReading = !assignment.isEmpty(); @@ -730,7 +761,11 @@ public void run() throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); } - fireDepartmentMetrics.incrementProcessed(); + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException(), record); + } else { + fireDepartmentMetrics.incrementProcessed(); + } } else { fireDepartmentMetrics.incrementThrownAway(); } @@ -757,18 +792,7 @@ public void onFailure(Throwable t) } } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug( - e, - "Dropping unparseable row from partition[%d] offset[%,d].", - record.partition(), - record.offset() - ); - - fireDepartmentMetrics.incrementUnparseable(); - } + handleParseException(e, record); } nextOffsets.put(record.partition(), record.offset() + 1); @@ -806,6 +830,7 @@ public void onFailure(Throwable t) } } } + ingestionState = IngestionState.COMPLETED; } catch (Exception e) { log.error(e, "Encountered exception in run() before persisting."); @@ -904,11 +929,11 @@ public void onFailure(Throwable t) toolbox.getDataSegmentServerAnnouncer().unannounce(); } - toolbox.getTaskReportFileWriter().write(null); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return success(); } - private TaskStatus runLegacy(final TaskToolbox toolbox) throws Exception + private TaskStatus runInternalLegacy(final TaskToolbox toolbox) throws Exception { log.info("Starting up!"); startTime = DateTimes.nowUtc(); @@ -931,6 +956,7 @@ private TaskStatus runLegacy(final TaskToolbox toolbox) throws Exception null ); fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(fireDepartmentMetrics); toolbox.getMonitorScheduler().addMonitor( new RealtimeMetricsMonitor( ImmutableList.of(fireDepartmentForMetrics), @@ -950,6 +976,8 @@ private TaskStatus runLegacy(final TaskToolbox toolbox) throws Exception ) ); + ingestionState = IngestionState.BUILD_SEGMENTS; + try ( final Appenderator appenderator0 = newAppenderator(fireDepartmentMetrics, toolbox); final StreamAppenderatorDriver driver = newDriver(appenderator0, toolbox, fireDepartmentMetrics); @@ -1127,11 +1155,17 @@ public void run() // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks. throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp()); } - fireDepartmentMetrics.incrementProcessed(); + + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException(), record); + } else { + fireDepartmentMetrics.incrementProcessed(); + } } else { fireDepartmentMetrics.incrementThrownAway(); } } + if (isPersistRequired) { driver.persist(committerSupplier.get()); } @@ -1141,18 +1175,7 @@ public void run() )); } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug( - e, - "Dropping unparseable row from partition[%d] offset[%,d].", - record.partition(), - record.offset() - ); - - fireDepartmentMetrics.incrementUnparseable(); - } + handleParseException(e, record); } nextOffsets.put(record.partition(), record.offset() + 1); @@ -1166,6 +1189,7 @@ public void run() } } } + ingestionState = IngestionState.COMPLETED; } catch (Exception e) { log.error(e, "Encountered exception in runLegacy() before persisting."); @@ -1273,8 +1297,76 @@ public String apply(DataSegment input) toolbox.getDataSegmentServerAnnouncer().unannounce(); } - toolbox.getTaskReportFileWriter().write(null); - return success(); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.success( + getId(), + null + ); + } + + private void handleParseException(ParseException pe, ConsumerRecord record) + { + if (pe.isFromPartiallyValidRow()) { + fireDepartmentMetrics.incrementProcessedWithErrors(); + } else { + fireDepartmentMetrics.incrementUnparseable(); + } + + if (tuningConfig.isLogParseExceptions()) { + log.error( + pe, + "Encountered parse exception on row from partition[%d] offset[%d]", + record.partition(), + record.offset() + ); + } + + if (savedParseExceptions != null) { + savedParseExceptions.add(pe); + } + + if (fireDepartmentMetrics.unparseable() + fireDepartmentMetrics.processedWithErrors() + > tuningConfig.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); + } + } + + private Map getTaskCompletionReports() + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats(), + errorMsg + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (buildSegmentsParseExceptionMessages != null) { + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + } + return unparseableEventsMap; + } + + private Map getTaskCompletionRowStats() + { + Map metrics = Maps.newHashMap(); + if (metricsGetter != null) { + metrics.put( + "buildSegments", + metricsGetter.getTotalMetrics() + ); + } + return metrics; } private void checkAndMaybeThrowException() @@ -1342,17 +1434,7 @@ public boolean canRestore() */ private Access authorizationCheck(final HttpServletRequest req, Action action) { - ResourceAction resourceAction = new ResourceAction( - new Resource(dataSchema.getDataSource(), ResourceType.DATASOURCE), - action - ); - - Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); - if (!access.isAllowed()) { - throw new ForbiddenException(access.toString()); - } - - return access; + return IndexTaskUtils.datasourceAuthorizationCheck(req, action, getDataSource(), authorizerMapper); } @VisibleForTesting @@ -1493,6 +1575,40 @@ public Response setEndOffsetsHTTP( return setEndOffsets(offsets, resume, finish); } + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req + ) + { + authorizationCheck(req, Action.READ); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (metricsGetter != null) { + totalsMap.put( + "buildSegments", + metricsGetter.getTotalMetrics() + ); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req + ) + { + authorizationCheck(req, Action.READ); + List events = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + return Response.ok(events).build(); + } + public Response setEndOffsets( Map offsets, final boolean resume, @@ -2039,12 +2155,7 @@ private boolean withinMinMaxRecordTime(final InputRow row) "Encountered row with timestamp that cannot be represented as a long: [%s]", row ); - log.debug(errorMsg); - if (tuningConfig.isReportParseExceptions()) { - throw new ParseException(errorMsg); - } else { - return false; - } + throw new ParseException(errorMsg); } if (log.isDebugEnabled()) { diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java index 21ec1ed9f1ca..4c0277646368 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/KafkaTuningConfig.java @@ -51,6 +51,10 @@ public class KafkaTuningConfig implements TuningConfig, AppenderatorConfig private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; private final Period intermediateHandoffPeriod; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @JsonCreator public KafkaTuningConfig( @JsonProperty("maxRowsInMemory") @Nullable Integer maxRowsInMemory, @@ -61,11 +65,14 @@ public KafkaTuningConfig( @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec, // This parameter is left for compatibility when reading existing configs, to be removed in Druid 0.12. @JsonProperty("buildV9Directly") @Nullable Boolean buildV9Directly, - @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @Deprecated @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, @JsonProperty("handoffConditionTimeout") @Nullable Long handoffConditionTimeout, @JsonProperty("resetOffsetAutomatically") @Nullable Boolean resetOffsetAutomatically, @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, - @JsonProperty("intermediateHandoffPeriod") @Nullable Period intermediateHandoffPeriod + @JsonProperty("intermediateHandoffPeriod") @Nullable Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { // Cannot be a static because default basePersistDirectory is unique per-instance @@ -92,6 +99,17 @@ public KafkaTuningConfig( this.intermediateHandoffPeriod = intermediateHandoffPeriod == null ? new Period().withDays(Integer.MAX_VALUE) : intermediateHandoffPeriod; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } public static KafkaTuningConfig copyOf(KafkaTuningConfig config) @@ -108,7 +126,10 @@ public static KafkaTuningConfig copyOf(KafkaTuningConfig config) config.handoffConditionTimeout, config.resetOffsetAutomatically, config.segmentWriteOutMediumFactory, - config.intermediateHandoffPeriod + config.intermediateHandoffPeriod, + config.logParseExceptions, + config.maxParseExceptions, + config.maxSavedParseExceptions ); } @@ -197,6 +218,24 @@ public Period getIntermediateHandoffPeriod() return intermediateHandoffPeriod; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + public KafkaTuningConfig withBasePersistDirectory(File dir) { return new KafkaTuningConfig( @@ -211,7 +250,10 @@ public KafkaTuningConfig withBasePersistDirectory(File dir) handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -235,7 +277,10 @@ public boolean equals(Object o) Objects.equals(basePersistDirectory, that.basePersistDirectory) && Objects.equals(indexSpec, that.indexSpec) && Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && - Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod); + Objects.equals(intermediateHandoffPeriod, that.intermediateHandoffPeriod) && + logParseExceptions == that.logParseExceptions && + maxParseExceptions == that.maxParseExceptions && + maxSavedParseExceptions == that.maxSavedParseExceptions; } @Override @@ -252,7 +297,10 @@ public int hashCode() handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -271,6 +319,9 @@ public String toString() ", resetOffsetAutomatically=" + resetOffsetAutomatically + ", segmentWriteOutMediumFactory=" + segmentWriteOutMediumFactory + ", intermediateHandoffPeriod=" + intermediateHandoffPeriod + + ", logParseExceptions=" + logParseExceptions + + ", maxParseExceptions=" + maxParseExceptions + + ", maxSavedParseExceptions=" + maxSavedParseExceptions + '}'; } } diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java index 1db4580c23f9..c796b3eb9b6c 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorSpec.java @@ -90,6 +90,9 @@ public KafkaSupervisorSpec( null, null, null, + null, + null, + null, null ); this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig"); diff --git a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java index c6e5a3fe8d65..4467a65d4e13 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTuningConfig.java @@ -57,7 +57,10 @@ public KafkaSupervisorTuningConfig( @JsonProperty("httpTimeout") Period httpTimeout, @JsonProperty("shutdownTimeout") Period shutdownTimeout, @JsonProperty("offsetFetchPeriod") Period offsetFetchPeriod, - @JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod + @JsonProperty("intermediateHandoffPeriod") Period intermediateHandoffPeriod, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { super( @@ -72,7 +75,10 @@ public KafkaSupervisorTuningConfig( handoffConditionTimeout, resetOffsetAutomatically, segmentWriteOutMediumFactory, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); this.workerThreads = workerThreads; @@ -140,6 +146,9 @@ public String toString() ", shutdownTimeout=" + shutdownTimeout + ", offsetFetchPeriod=" + offsetFetchPeriod + ", intermediateHandoffPeriod=" + getIntermediateHandoffPeriod() + + ", logParseExceptions=" + isLogParseExceptions() + + ", maxParseExceptions=" + getMaxParseExceptions() + + ", maxSavedParseExceptions=" + getMaxSavedParseExceptions() + '}'; } diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java index 9fb0495284f2..e0dd1be77184 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -36,6 +36,14 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.LongDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; +import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; +import io.druid.indexing.common.task.IndexTaskTest; import io.druid.client.cache.CacheConfig; import io.druid.client.cache.MapCache; import io.druid.data.input.impl.DimensionsSpec; @@ -57,7 +65,6 @@ import io.druid.indexing.common.actions.TaskActionToolbox; import io.druid.indexing.common.config.TaskConfig; import io.druid.indexing.common.config.TaskStorageConfig; -import io.druid.indexing.common.task.NoopTestTaskFileWriter; import io.druid.indexing.common.task.Task; import io.druid.indexing.kafka.supervisor.KafkaSupervisor; import io.druid.indexing.kafka.test.TestBroker; @@ -101,6 +108,7 @@ import io.druid.query.SegmentDescriptor; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.CountAggregatorFactory; +import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.query.filter.SelectorDimFilter; import io.druid.query.timeseries.TimeseriesQuery; @@ -152,6 +160,7 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -183,6 +192,9 @@ public class KafkaIndexTaskTest private long handoffConditionTimeout = 0; private boolean reportParseExceptions = false; + private boolean logParseExceptions = true; + private Integer maxParseExceptions = null; + private Integer maxSavedParseExceptions = null; private boolean resetOffsetAutomatically = false; private boolean doHandoff = true; private Integer maxRowsPerSegment = null; @@ -197,6 +209,7 @@ public class KafkaIndexTaskTest private List> records; private final boolean isIncrementalHandoffSupported; private final Set checkpointRequestsHash = Sets.newHashSet(); + private File reportsFile; // This should be removed in versions greater that 0.12.x // isIncrementalHandoffSupported should always be set to true in those later versions @@ -218,7 +231,13 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) new JSONParseSpec( new TimestampSpec("timestamp", "iso", null), new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim1t", "dim2")), + Arrays.asList( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim1t"), + new StringDimensionSchema("dim2"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), null, null ), @@ -229,7 +248,10 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) ), Map.class ), - new AggregatorFactory[]{new CountAggregatorFactory("rows")}, + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory("met1sum", "met1"), + new CountAggregatorFactory("rows") + }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null, objectMapper @@ -238,17 +260,21 @@ public KafkaIndexTaskTest(boolean isIncrementalHandoffSupported) private static List> generateRecords(String topic) { return ImmutableList.of( - new ProducerRecord(topic, 0, null, JB("2008", "a", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2009", "b", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2010", "c", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2011", "d", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("2011", "e", "y", 1.0f)), - new ProducerRecord(topic, 0, null, JB("246140482-04-24T15:36:27.903Z", "x", "z", 1.0f)), + new ProducerRecord(topic, 0, null, JB("2008", "a", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2009", "b", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2010", "c", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2011", "d", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2011", "e", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("246140482-04-24T15:36:27.903Z", "x", "z", "10", "20.0", "1.0")), new ProducerRecord(topic, 0, null, StringUtils.toUtf8("unparseable")), + new ProducerRecord(topic, 0, null, StringUtils.toUtf8("unparseable2")), new ProducerRecord(topic, 0, null, null), - new ProducerRecord(topic, 0, null, JB("2013", "f", "y", 1.0f)), - new ProducerRecord(topic, 1, null, JB("2012", "g", "y", 1.0f)), - new ProducerRecord(topic, 1, null, JB("2011", "h", "y", 1.0f)) + new ProducerRecord(topic, 0, null, JB("2013", "f", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "notanumber", "20.0", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "10", "notanumber", "1.0")), + new ProducerRecord(topic, 0, null, JB("2049", "f", "y", "10", "20.0", "notanumber")), + new ProducerRecord(topic, 1, null, JB("2012", "g", "y", "10", "20.0", "1.0")), + new ProducerRecord(topic, 1, null, JB("2011", "h", "y", "10", "20.0", "1.0")) ); } @@ -297,9 +323,13 @@ public void setupTest() throws IOException { handoffConditionTimeout = 0; reportParseExceptions = false; + logParseExceptions = true; + maxParseExceptions = null; + maxSavedParseExceptions = null; doHandoff = true; topic = getTopicName(); records = generateRecords(topic); + reportsFile = File.createTempFile("KafkaIndexTaskTestReports-" + System.currentTimeMillis(), "json"); makeToolboxFactory(); } @@ -313,7 +343,7 @@ public void tearDownTest() runningTasks.clear(); } - + reportsFile.delete(); destroyToolboxFactory(); } @@ -459,7 +489,7 @@ public void testIncrementalHandOff() throws Exception // of events fetched across two partitions from Kafka final KafkaPartitions checkpoint1 = new KafkaPartitions(topic, ImmutableMap.of(0, 5L, 1, 0L)); final KafkaPartitions checkpoint2 = new KafkaPartitions(topic, ImmutableMap.of(0, 4L, 1, 2L)); - final KafkaPartitions endPartitions = new KafkaPartitions(topic, ImmutableMap.of(0, 9L, 1, 2L)); + final KafkaPartitions endPartitions = new KafkaPartitions(topic, ImmutableMap.of(0, 10L, 1, 2L)); final KafkaIndexTask task = createTask( null, new KafkaIOConfig( @@ -496,8 +526,8 @@ public void testIncrementalHandOff() throws Exception // Check metrics Assert.assertEquals(8, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata SegmentDescriptor desc1 = SD(task, "2008/P1D", 0); @@ -509,7 +539,7 @@ public void testIncrementalHandOff() throws Exception SegmentDescriptor desc7 = SD(task, "2013/P1D", 0); Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4, desc5, desc6, desc7), publishedDescriptors()); Assert.assertEquals( - new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 9L, 1, 2L))), + new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 10L, 1, 2L))), metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); @@ -924,6 +954,10 @@ public void testReportParseExceptions() throws Exception { reportParseExceptions = true; + // these will be ignored because reportParseExceptions is true + maxParseExceptions = 1000; + maxSavedParseExceptions = 2; + // Insert data try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { for (ProducerRecord record : records) { @@ -953,12 +987,165 @@ public void testReportParseExceptions() throws Exception // Check metrics Assert.assertEquals(3, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(0, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); + + // Check published metadata + Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); + Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsSuccess() throws Exception + { + reportParseExceptions = false; + maxParseExceptions = 6; + maxSavedParseExceptions = 6; + + // Insert data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + } + + final KafkaIndexTask task = createTask( + null, + new KafkaIOConfig( + "sequence0", + new KafkaPartitions(topic, ImmutableMap.of(0, 2L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 13L)), + kafkaServer.consumerProperties(), + true, + false, + null, + null, + false + ) + ); + + final ListenableFuture future = runTask(task); + + TaskStatus status = future.get(); + + // Wait for task to exit + Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); + Assert.assertEquals(null, status.getErrorMsg()); + + // Check metrics + Assert.assertEquals(4, task.getFireDepartmentMetrics().processed()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().processedWithErrors()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); + + // Check published metadata + SegmentDescriptor desc1 = SD(task, "2010/P1D", 0); + SegmentDescriptor desc2 = SD(task, "2011/P1D", 0); + SegmentDescriptor desc3 = SD(task, "2013/P1D", 0); + SegmentDescriptor desc4 = SD(task, "2049/P1D", 0); + Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4), publishedDescriptors()); + Assert.assertEquals( + new KafkaDataSourceMetadata(new KafkaPartitions(topic, ImmutableMap.of(0, 13L))), + metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) + ); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 4, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 + ) + ); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + + Map unparseableEvents = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=20.0, met1=notanumber}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [Unable to parse value[notanumber] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=10, dimFloat=notanumber, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [could not convert value [notanumber] to float,]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2049-01-01T00:00:00.000Z, event={timestamp=2049, dim1=f, dim2=y, dimLong=notanumber, dimFloat=20.0, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}], exceptions: [could not convert value [notanumber] to long,]", + "Unable to parse row [unparseable2]", + "Unable to parse row [unparseable]", + "Encountered row with timestamp that cannot be represented as a long: [MapBasedInputRow{timestamp=246140482-04-24T15:36:27.903Z, event={timestamp=246140482-04-24T15:36:27.903Z, dim1=x, dim2=z, dimLong=10, dimFloat=20.0, met1=1.0}, dimensions=[dim1, dim1t, dim2, dimLong, dimFloat]}]" + ) + ); + + Assert.assertEquals(unparseableEvents, reportData.getUnparseableEvents()); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsFailure() throws Exception + { + reportParseExceptions = false; + maxParseExceptions = 2; + maxSavedParseExceptions = 2; + + // Insert data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + } + + final KafkaIndexTask task = createTask( + null, + new KafkaIOConfig( + "sequence0", + new KafkaPartitions(topic, ImmutableMap.of(0, 2L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), + kafkaServer.consumerProperties(), + true, + false, + null, + null, + false + ) + ); + + final ListenableFuture future = runTask(task); + + TaskStatus status = future.get(); + + // Wait for task to exit + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + IndexTaskTest.checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + + // Check metrics + Assert.assertEquals(3, task.getFireDepartmentMetrics().processed()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().processedWithErrors()); + Assert.assertEquals(3, task.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata Assert.assertEquals(ImmutableSet.of(), publishedDescriptors()); Assert.assertNull(metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource())); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 3, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 + ) + ); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + + Map unparseableEvents = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unable to parse row [unparseable2]", + "Unable to parse row [unparseable]" + ) + ); + + Assert.assertEquals(unparseableEvents, reportData.getUnparseableEvents()); } @Test(timeout = 60_000L) @@ -1051,7 +1238,7 @@ public void testRunConflicting() throws Exception new KafkaIOConfig( "sequence1", new KafkaPartitions(topic, ImmutableMap.of(0, 3L)), - new KafkaPartitions(topic, ImmutableMap.of(0, 9L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), kafkaServer.consumerProperties(), true, false, @@ -1081,8 +1268,8 @@ public void testRunConflicting() throws Exception Assert.assertEquals(0, task1.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task1.getFireDepartmentMetrics().thrownAway()); Assert.assertEquals(3, task2.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task2.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task2.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task2.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task2.getFireDepartmentMetrics().thrownAway()); // Check published segments & metadata, should all be from the first task SegmentDescriptor desc1 = SD(task1, "2010/P1D", 0); @@ -1120,7 +1307,7 @@ public void testRunConflictingWithoutTransactions() throws Exception new KafkaIOConfig( "sequence1", new KafkaPartitions(topic, ImmutableMap.of(0, 3L)), - new KafkaPartitions(topic, ImmutableMap.of(0, 9L)), + new KafkaPartitions(topic, ImmutableMap.of(0, 10L)), kafkaServer.consumerProperties(), false, false, @@ -1156,8 +1343,8 @@ public void testRunConflictingWithoutTransactions() throws Exception Assert.assertEquals(0, task1.getFireDepartmentMetrics().unparseable()); Assert.assertEquals(0, task1.getFireDepartmentMetrics().thrownAway()); Assert.assertEquals(3, task2.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task2.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(2, task2.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(3, task2.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(1, task2.getFireDepartmentMetrics().thrownAway()); // Check published segments & metadata SegmentDescriptor desc3 = SD(task2, "2011/P1D", 1); @@ -1548,8 +1735,8 @@ public void testRunAndPauseAfterReadWithModifiedEndOffsets() throws Exception // Check metrics Assert.assertEquals(4, task.getFireDepartmentMetrics().processed()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().unparseable()); - Assert.assertEquals(1, task.getFireDepartmentMetrics().thrownAway()); + Assert.assertEquals(2, task.getFireDepartmentMetrics().unparseable()); + Assert.assertEquals(0, task.getFireDepartmentMetrics().thrownAway()); // Check published metadata SegmentDescriptor desc1 = SD(task, "2009/P1D", 0); @@ -1788,7 +1975,10 @@ private KafkaIndexTask createTask( handoffConditionTimeout, resetOffsetAutomatically, null, - intermediateHandoffPeriod + intermediateHandoffPeriod, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); final Map context = isIncrementalHandoffSupported ? ImmutableMap.of(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED, true) @@ -1827,7 +2017,10 @@ private KafkaIndexTask createTask( handoffConditionTimeout, resetOffsetAutomatically, null, - null + null, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); if (isIncrementalHandoffSupported) { context.put(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED, true); @@ -2034,7 +2227,7 @@ public List getLocations() EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1, ServerType.INDEXER_EXECUTOR, 0), - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); } @@ -2132,11 +2325,18 @@ public long countEvents(final Task task) return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric("rows"); } - private static byte[] JB(String timestamp, String dim1, String dim2, double met1) + private static byte[] JB(String timestamp, String dim1, String dim2, String dimLong, String dimFloat, String met1) { try { return new ObjectMapper().writeValueAsBytes( - ImmutableMap.of("timestamp", timestamp, "dim1", dim1, "dim2", dim2, "met1", met1) + ImmutableMap.builder() + .put("timestamp", timestamp) + .put("dim1", dim1) + .put("dim2", dim2) + .put("dimLong", dimLong) + .put("dimFloat", dimFloat) + .put("met1", met1) + .build() ); } catch (Exception e) { @@ -2149,4 +2349,17 @@ private SegmentDescriptor SD(final Task task, final String intervalString, final final Interval interval = Intervals.of(intervalString); return new SegmentDescriptor(interval, getLock(task, interval).getVersion(), partitionNum); } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = objectMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports + ); + } } diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java index d18a34651de3..78916e7d27b7 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/KafkaTuningConfigTest.java @@ -113,6 +113,9 @@ public void testCopyOf() 5L, null, null, + null, + null, + null, null ); KafkaTuningConfig copy = KafkaTuningConfig.copyOf(original); diff --git a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java index f1c8ce97c9fe..c44425c17e9b 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/io/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java @@ -201,6 +201,9 @@ public void setupTest() TEST_HTTP_TIMEOUT, TEST_SHUTDOWN_TIMEOUT, null, + null, + null, + null, null ); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java index 44e75805d77d..7955aae220b7 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java @@ -46,6 +46,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; @@ -55,6 +56,7 @@ import org.joda.time.DateTimeComparator; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -69,6 +71,8 @@ public class DetermineHashedPartitionsJob implements Jobby { private static final Logger log = new Logger(DetermineHashedPartitionsJob.class); private final HadoopDruidIndexerConfig config; + private String failureCause; + private Job groupByJob; public DetermineHashedPartitionsJob( HadoopDruidIndexerConfig config @@ -86,7 +90,7 @@ public boolean run() * in the final segment. */ final long startTime = System.currentTimeMillis(); - final Job groupByJob = Job.getInstance( + groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_hashed-%s", config.getDataSource(), config.getIntervals()) ); @@ -121,6 +125,7 @@ public boolean run() if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); + failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } @@ -197,6 +202,7 @@ public boolean run() log.info("Path[%s] didn't exist!?", partitionInfoPath); } } + config.setShardSpecs(shardSpecs); log.info( "DetermineHashedPartitionsJob took %d millis", @@ -210,6 +216,42 @@ public boolean run() } } + @Override + public Map getStats() + { + if (groupByJob == null) { + return null; + } + + try { + Counters jobCounters = groupByJob.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Nullable + @Override + public String getErrorMessage() + { + return failureCause; + } + public static class DetermineCardinalityMapper extends HadoopDruidIndexerMapper { private static HashFunction hashFunction = Hashing.murmur3_128(); @@ -269,9 +311,12 @@ protected void innerMap( } interval = maybeInterval.get(); } + hyperLogLogs .get(interval) .add(hashFunction.hashBytes(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)).asBytes()); + + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); } @Override diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java index 46f8aa6fed83..8052469daa23 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java @@ -54,6 +54,7 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InvalidJobConfException; +import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.Mapper; @@ -70,6 +71,7 @@ import org.joda.time.Interval; import org.joda.time.chrono.ISOChronology; +import javax.annotation.Nullable; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -100,6 +102,10 @@ public class DeterminePartitionsJob implements Jobby private final HadoopDruidIndexerConfig config; + private Job groupByJob; + + private String failureCause; + public DeterminePartitionsJob( HadoopDruidIndexerConfig config ) @@ -124,7 +130,7 @@ public boolean run() } if (!config.getPartitionsSpec().isAssumeGrouped()) { - final Job groupByJob = Job.getInstance( + groupByJob = Job.getInstance( new Configuration(), StringUtils.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()) ); @@ -155,6 +161,7 @@ public boolean run() if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); + failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } else { @@ -212,6 +219,7 @@ public boolean run() if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); + failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER); return false; } @@ -255,6 +263,42 @@ public boolean run() } } + @Override + public Map getStats() + { + if (groupByJob == null) { + return null; + } + + try { + Counters jobCounters = groupByJob.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Nullable + @Override + public String getErrorMessage() + { + return failureCause; + } + public static class DeterminePartitionsGroupByMapper extends HadoopDruidIndexerMapper { private Granularity rollupGranularity = null; @@ -282,6 +326,8 @@ protected void innerMap( new BytesWritable(HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsBytes(groupKey)), NullWritable.get() ); + + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java index 0229b073beeb..3adcf31c00e1 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidDetermineConfigurationJob.java @@ -38,6 +38,7 @@ public class HadoopDruidDetermineConfigurationJob implements Jobby { private static final Logger log = new Logger(HadoopDruidDetermineConfigurationJob.class); private final HadoopDruidIndexerConfig config; + private Jobby job; @Inject public HadoopDruidDetermineConfigurationJob( @@ -50,12 +51,11 @@ public HadoopDruidDetermineConfigurationJob( @Override public boolean run() { - List jobs = Lists.newArrayList(); - JobHelper.ensurePaths(config); if (config.isDeterminingPartitions()) { - jobs.add(config.getPartitionsSpec().getPartitionJob(config)); + job = config.getPartitionsSpec().getPartitionJob(config); + return JobHelper.runSingleJob(job, config); } else { int shardsPerInterval = config.getPartitionsSpec().getNumShards(); Map> shardSpecs = Maps.newTreeMap(); @@ -86,10 +86,27 @@ public boolean run() } } config.setShardSpecs(shardSpecs); + return true; } + } - return JobHelper.runJobs(jobs, config); + @Override + public Map getStats() + { + if (job == null) { + return null; + } + return job.getStats(); } + @Override + public String getErrorMessage() + { + if (job == null) { + return null; + } + + return job.getErrorMessage(); + } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java index 569bebdcffa5..f07bcb702343 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java @@ -126,7 +126,11 @@ public void configure(Binder binder) public enum IndexJobCounters { - INVALID_ROW_COUNTER + INVALID_ROW_COUNTER, + ROWS_PROCESSED_COUNTER, + ROWS_PROCESSED_WITH_ERRORS_COUNTER, + ROWS_UNPARSEABLE_COUNTER, + ROWS_THROWN_AWAY_COUNTER } public static HadoopDruidIndexerConfig fromSpec(HadoopIngestionSpec spec) @@ -370,6 +374,16 @@ public int getShardSpecCount(Bucket bucket) return schema.getTuningConfig().getShardSpecs().get(bucket.time.getMillis()).size(); } + public boolean isLogParseExceptions() + { + return schema.getTuningConfig().isLogParseExceptions(); + } + + public int getMaxParseExceptions() + { + return schema.getTuningConfig().getMaxParseExceptions(); + } + /** * Job instance should have Configuration set (by calling {@link #addJobProperties(Job)} * or via injected system properties) before this method is called. The {@link PathSpec} may diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java index d46b73cd4c7c..e4096122c025 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerJob.java @@ -26,7 +26,9 @@ import io.druid.java.util.common.logger.Logger; import io.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.List; +import java.util.Map; /** */ @@ -92,8 +94,28 @@ public boolean run() ); - JobHelper.runJobs(jobs, config); - return true; + return JobHelper.runJobs(jobs, config); + } + + @Override + public Map getStats() + { + if (indexJob == null) { + return null; + } + + return indexJob.getStats(); + } + + @Nullable + @Override + public String getErrorMessage() + { + if (indexJob == null) { + return null; + } + + return indexJob.getErrorMessage(); } public List getPublishedSegments() diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java index b5707c4fa598..f905a24c9c23 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerMapper.java @@ -24,12 +24,15 @@ import io.druid.data.input.impl.InputRowParser; import io.druid.data.input.impl.StringInputRowParser; import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.Intervals; import io.druid.java.util.common.RE; +import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.collect.Utils; import io.druid.java.util.common.logger.Logger; import io.druid.java.util.common.parsers.ParseException; import io.druid.segment.indexing.granularity.GranularitySpec; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; @@ -63,37 +66,70 @@ public HadoopDruidIndexerConfig getConfig() protected void map(Object key, Object value, Context context) throws IOException, InterruptedException { try { - final List inputRows; - try { - inputRows = parseInputRow(value, parser); - } - catch (ParseException e) { - if (reportParseExceptions) { - throw e; - } - log.debug(e, "Ignoring invalid row [%s] due to parsing error", value); - context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); - return; // we're ignoring this invalid row - } + final List inputRows = parseInputRow(value, parser); for (InputRow inputRow : inputRows) { - if (inputRow == null) { - // Throw away null rows from the parser. - log.debug("Throwing away row [%s]", value); - continue; + try { + if (inputRow == null) { + // Throw away null rows from the parser. + log.debug("Throwing away row [%s]", value); + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1); + continue; + } + + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + + if (!granularitySpec.bucketIntervals().isPresent() + || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())) + .isPresent()) { + innerMap(inputRow, context, reportParseExceptions); + } else { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).increment(1); + } } - if (!granularitySpec.bucketIntervals().isPresent() - || granularitySpec.bucketInterval(DateTimes.utc(inputRow.getTimestampFromEpoch())) - .isPresent()) { - innerMap(inputRow, context, reportParseExceptions); + catch (ParseException pe) { + handleParseException(pe, context); } } } + catch (ParseException pe) { + handleParseException(pe, context); + } catch (RuntimeException e) { throw new RE(e, "Failure on row[%s]", value); } } + private void handleParseException(ParseException pe, Context context) + { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER).increment(1); + Counter unparseableCounter = context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER); + Counter processedWithErrorsCounter = context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER); + + if (pe.isFromPartiallyValidRow()) { + processedWithErrorsCounter.increment(1); + } else { + unparseableCounter.increment(1); + } + + if (config.isLogParseExceptions()) { + log.error(pe, "Encountered parse exception: "); + } + + long rowsUnparseable = unparseableCounter.getValue(); + long rowsProcessedWithError = processedWithErrorsCounter.getValue(); + if (rowsUnparseable + rowsProcessedWithError > config.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task...", pe); + } + } + private static List parseInputRow(Object value, InputRowParser parser) { if (parser instanceof StringInputRowParser && value instanceof Text) { diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java index aeb72c033f87..a997e40d2994 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopTuningConfig.java @@ -31,6 +31,7 @@ import io.druid.segment.IndexSpec; import io.druid.segment.indexing.TuningConfig; +import javax.annotation.Nullable; import java.util.List; import java.util.Map; @@ -67,6 +68,8 @@ public static HadoopTuningConfig makeDefaultTuningConfig() DEFAULT_NUM_BACKGROUND_PERSIST_THREADS, false, false, + null, + null, null ); } @@ -88,6 +91,8 @@ public static HadoopTuningConfig makeDefaultTuningConfig() private final boolean forceExtendableShardSpecs; private final boolean useExplicitVersion; private final List allowedHadoopPrefix; + private final boolean logParseExceptions; + private final int maxParseExceptions; @JsonCreator public HadoopTuningConfig( @@ -100,7 +105,7 @@ public HadoopTuningConfig( final @JsonProperty("leaveIntermediate") boolean leaveIntermediate, final @JsonProperty("cleanupOnFailure") Boolean cleanupOnFailure, final @JsonProperty("overwriteFiles") boolean overwriteFiles, - final @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows, + final @Deprecated @JsonProperty("ignoreInvalidRows") boolean ignoreInvalidRows, final @JsonProperty("jobProperties") Map jobProperties, final @JsonProperty("combineText") boolean combineText, final @JsonProperty("useCombiner") Boolean useCombiner, @@ -111,7 +116,9 @@ public HadoopTuningConfig( final @JsonProperty("numBackgroundPersistThreads") Integer numBackgroundPersistThreads, final @JsonProperty("forceExtendableShardSpecs") boolean forceExtendableShardSpecs, final @JsonProperty("useExplicitVersion") boolean useExplicitVersion, - final @JsonProperty("allowedHadoopPrefix") List allowedHadoopPrefix + final @JsonProperty("allowedHadoopPrefix") List allowedHadoopPrefix, + final @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + final @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions ) { this.workingPath = workingPath; @@ -138,6 +145,13 @@ public HadoopTuningConfig( Preconditions.checkArgument(this.numBackgroundPersistThreads >= 0, "Not support persistBackgroundCount < 0"); this.useExplicitVersion = useExplicitVersion; this.allowedHadoopPrefix = allowedHadoopPrefix == null ? ImmutableList.of() : allowedHadoopPrefix; + + if (!this.ignoreInvalidRows) { + this.maxParseExceptions = 0; + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } @JsonProperty @@ -253,6 +267,18 @@ public List getUserAllowedHadoopPrefix() return allowedHadoopPrefix; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + public HadoopTuningConfig withWorkingPath(String path) { return new HadoopTuningConfig( @@ -274,7 +300,9 @@ public HadoopTuningConfig withWorkingPath(String path) numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } @@ -299,7 +327,9 @@ public HadoopTuningConfig withVersion(String ver) numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } @@ -324,7 +354,9 @@ public HadoopTuningConfig withShardSpecs(Map> specs numBackgroundPersistThreads, forceExtendableShardSpecs, useExplicitVersion, - allowedHadoopPrefix + allowedHadoopPrefix, + logParseExceptions, + maxParseExceptions ); } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index cd2389f52ea9..b5708b94354c 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -43,6 +43,7 @@ import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.concurrent.Execs; import io.druid.java.util.common.logger.Logger; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.aggregation.AggregatorFactory; import io.druid.segment.BaseProgressIndicator; import io.druid.segment.ProgressIndicator; @@ -137,6 +138,7 @@ public static List getPublishedSegments(HadoopDruidIndexerConfig co private final HadoopDruidIndexerConfig config; private IndexGeneratorStats jobStats; + private Job job; public IndexGeneratorJob( HadoopDruidIndexerConfig config @@ -155,7 +157,7 @@ protected void setReducerClass(final Job job) public boolean run() { try { - Job job = Job.getInstance( + job = Job.getInstance( new Configuration(), StringUtils.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()) ); @@ -225,6 +227,45 @@ public boolean run() } } + @Override + public Map getStats() + { + if (job == null) { + return null; + } + + try { + Counters jobCounters = job.getCounters(); + + Map metrics = TaskMetricsUtils.makeIngestionRowMetrics( + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_WITH_ERRORS_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_UNPARSEABLE_COUNTER).getValue(), + jobCounters.findCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER).getValue() + ); + + return metrics; + } + catch (IllegalStateException ise) { + log.debug("Couldn't get counters due to job state"); + return null; + } + catch (Exception e) { + log.debug(e, "Encountered exception in getStats()."); + return null; + } + } + + @Override + public String getErrorMessage() + { + if (job == null) { + return null; + } + + return Utils.getFailureMessage(job, config.JSON_MAPPER); + } + private static IncrementalIndex makeIncrementalIndex( Bucket theBucket, AggregatorFactory[] aggs, @@ -316,10 +357,18 @@ protected void innerMap( // type SegmentInputRow serves as a marker that these InputRow instances have already been combined // and they contain the columns as they show up in the segment after ingestion, not what you would see in raw // data - byte[] serializedInputRow = inputRow instanceof SegmentInputRow ? - InputRowSerde.toBytes(typeHelperMap, inputRow, aggsForSerializingSegmentInputRow, reportParseExceptions) - : - InputRowSerde.toBytes(typeHelperMap, inputRow, aggregators, reportParseExceptions); + InputRowSerde.SerializeResult serializeResult = inputRow instanceof SegmentInputRow ? + InputRowSerde.toBytes( + typeHelperMap, + inputRow, + aggsForSerializingSegmentInputRow + ) + : + InputRowSerde.toBytes( + typeHelperMap, + inputRow, + aggregators + ); context.write( new SortableBytes( @@ -330,8 +379,19 @@ protected void innerMap( .put(hashedDimensions) .array() ).toBytesWritable(), - new BytesWritable(serializedInputRow) + new BytesWritable(serializeResult.getSerializedRow()) + ); + + ParseException pe = IncrementalIndex.getCombinedParseException( + inputRow, + serializeResult.getParseExceptionMessages(), + null ); + if (pe != null) { + throw pe; + } else { + context.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_PROCESSED_COUNTER).increment(1); + } } } @@ -406,11 +466,11 @@ private void flushIndexToContextAndClose(BytesWritable key, IncrementalIndex ind InputRow inputRow = getInputRowFromRow(row, dimensions); // reportParseExceptions is true as any unparseable data is already handled by the mapper. - byte[] serializedRow = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs, true); + InputRowSerde.SerializeResult serializeResult = InputRowSerde.toBytes(typeHelperMap, inputRow, combiningAggs); context.write( key, - new BytesWritable(serializedRow) + new BytesWritable(serializeResult.getSerializedRow()) ); } index.close(); @@ -629,7 +689,7 @@ public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) context.progress(); final InputRow inputRow = index.formatRow(InputRowSerde.fromBytes(typeHelperMap, bw.getBytes(), aggregators)); - int numRows = index.add(inputRow); + int numRows = index.add(inputRow).getRowCount(); ++lineCount; diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java index cd1dd531604a..4f0d9d4c81a3 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/InputRowSerde.java @@ -47,6 +47,7 @@ import java.io.DataInput; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -65,7 +66,7 @@ public interface IndexSerdeTypeHelper { ValueType getType(); - void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions); + void serialize(ByteArrayDataOutput out, Object value); T deserialize(ByteArrayDataInput in); } @@ -96,6 +97,31 @@ public static Map getTypeHelperMap(DimensionsSpec return typeHelperMap; } + public static class SerializeResult + { + private final byte[] serializedRow; + private final List parseExceptionMessages; + + public SerializeResult( + final byte[] serializedRow, + final List parseExceptionMessages + ) + { + this.serializedRow = serializedRow; + this.parseExceptionMessages = parseExceptionMessages; + } + + public byte[] getSerializedRow() + { + return serializedRow; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } + public static class StringIndexSerdeTypeHelper implements IndexSerdeTypeHelper> { @Override @@ -105,7 +131,7 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { List values = Rows.objectToStrings(value); try { @@ -137,15 +163,27 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - Long ret = DimensionHandlerUtils.convertObjectToLong(value, reportParseExceptions); + ParseException exceptionToThrow = null; + Long ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToLong(value, true); + } + catch (ParseException pe) { + exceptionToThrow = pe; + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_LONG; } out.writeLong(ret); + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -164,15 +202,27 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - Float ret = DimensionHandlerUtils.convertObjectToFloat(value, reportParseExceptions); + ParseException exceptionToThrow = null; + Float ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToFloat(value, true); + } + catch (ParseException pe) { + exceptionToThrow = pe; + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_FLOAT; } out.writeFloat(ret); + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -191,15 +241,27 @@ public ValueType getType() } @Override - public void serialize(ByteArrayDataOutput out, Object value, boolean reportParseExceptions) + public void serialize(ByteArrayDataOutput out, Object value) { - Double ret = DimensionHandlerUtils.convertObjectToDouble(value, reportParseExceptions); + ParseException exceptionToThrow = null; + Double ret = null; + try { + ret = DimensionHandlerUtils.convertObjectToDouble(value, true); + } + catch (ParseException pe) { + exceptionToThrow = pe; + } + if (ret == null) { // remove null -> zero conversion when https://github.com/druid-io/druid/pull/5278 series of patches is merged // we'll also need to change the serialized encoding so that it can represent numeric nulls ret = DimensionHandlerUtils.ZERO_DOUBLE; } out.writeDouble(ret); + + if (exceptionToThrow != null) { + throw exceptionToThrow; + } } @Override @@ -209,14 +271,14 @@ public Double deserialize(ByteArrayDataInput in) } } - public static final byte[] toBytes( + public static final SerializeResult toBytes( final Map typeHelperMap, final InputRow row, - AggregatorFactory[] aggs, - boolean reportParseExceptions + AggregatorFactory[] aggs ) { try { + List parseExceptionMessages = new ArrayList<>(); ByteArrayDataOutput out = ByteStreams.newDataOutput(); //write timestamp @@ -233,7 +295,13 @@ public static final byte[] toBytes( typeHelper = STRING_HELPER; } writeString(dim, out); - typeHelper.serialize(out, row.getRaw(dim), reportParseExceptions); + + try { + typeHelper.serialize(out, row.getRaw(dim)); + } + catch (ParseException pe) { + parseExceptionMessages.add(pe.getMessage()); + } } } @@ -264,10 +332,8 @@ public InputRow get() } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { - throw new ParseException(e, "Encountered parse error for aggregator[%s]", k); - } log.debug(e, "Encountered parse error, skipping aggregator[%s].", k); + parseExceptionMessages.add(e.getMessage()); } String t = aggFactory.getTypeName(); @@ -287,7 +353,7 @@ public InputRow get() } } - return out.toByteArray(); + return new SerializeResult(out.toByteArray(), parseExceptionMessages); } catch (IOException ex) { throw new RuntimeException(ex); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java index db44e01e422a..221c8a033939 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java @@ -345,19 +345,40 @@ public static void ensurePaths(HadoopDruidIndexerConfig config) } } + public static boolean runSingleJob(Jobby job, HadoopDruidIndexerConfig config) + { + boolean succeeded = job.run(); + + if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { + if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { + Path workingPath = config.makeIntermediatePath(); + log.info("Deleting path[%s]", workingPath); + try { + Configuration conf = injectSystemProperties(new Configuration()); + config.addJobProperties(conf); + workingPath.getFileSystem(conf).delete(workingPath, true); + } + catch (IOException e) { + log.error(e, "Failed to cleanup path[%s]", workingPath); + } + } + } + + return succeeded; + } + public static boolean runJobs(List jobs, HadoopDruidIndexerConfig config) { - String failedMessage = null; + boolean succeeded = true; for (Jobby job : jobs) { - if (failedMessage == null) { - if (!job.run()) { - failedMessage = StringUtils.format("Job[%s] failed!", job.getClass()); - } + if (!job.run()) { + succeeded = false; + break; } } if (!config.getSchema().getTuningConfig().isLeaveIntermediate()) { - if (failedMessage == null || config.getSchema().getTuningConfig().isCleanupOnFailure()) { + if (succeeded || config.getSchema().getTuningConfig().isCleanupOnFailure()) { Path workingPath = config.makeIntermediatePath(); log.info("Deleting path[%s]", workingPath); try { @@ -371,11 +392,7 @@ public static boolean runJobs(List jobs, HadoopDruidIndexerConfig config) } } - if (failedMessage != null) { - throw new ISE(failedMessage); - } - - return true; + return succeeded; } public static DataSegment serializeOutIndex( diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java index 3f3523e74049..1a899df18ee3 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/Utils.java @@ -20,15 +20,19 @@ package io.druid.indexer; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; import io.druid.jackson.DefaultObjectMapper; import io.druid.java.util.common.jackson.JacksonUtils; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.logger.Logger; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.TaskCompletionEvent; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.ReflectionUtils; @@ -41,6 +45,7 @@ */ public class Utils { + private static final Logger log = new Logger(Utils.class); private static final ObjectMapper jsonMapper = new DefaultObjectMapper(); public static OutputStream makePathAndOutputStream(JobContext job, Path outputPath, boolean deleteExisting) @@ -123,4 +128,25 @@ public static void storeStats( stats ); } + + public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) + { + try { + Map taskDiagsMap = Maps.newHashMap(); + TaskCompletionEvent[] completionEvents = failedJob.getTaskCompletionEvents(0, 100); + for (TaskCompletionEvent tce : completionEvents) { + String[] taskDiags = failedJob.getTaskDiagnostics(tce.getTaskAttemptId()); + String combinedTaskDiags = ""; + for (String taskDiag : taskDiags) { + combinedTaskDiags += taskDiag; + } + taskDiagsMap.put(tce.getTaskAttemptId().toString(), combinedTaskDiags); + } + return jsonMapper.writeValueAsString(taskDiagsMap); + } + catch (IOException | InterruptedException ie) { + log.error(ie, "couldn't get failure cause for job [%s]", failedJob.getJobName()); + return null; + } + } } diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java index 59d1f3103553..b7bb444c1cc7 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/BatchDeltaIngestionTest.java @@ -371,7 +371,7 @@ private void testIngestion( ) throws Exception { IndexGeneratorJob job = new IndexGeneratorJob(config); - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); File segmentFolder = new File( StringUtils.format( @@ -492,6 +492,8 @@ private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig( null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java index 4ef1d02bd804..8d656f20d4b8 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/DetermineHashedPartitionsJobTest.java @@ -210,6 +210,8 @@ public DetermineHashedPartitionsJobTest( null, false, false, + null, + null, null ) ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java index 0496cb3d7f5d..908425198783 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/DeterminePartitionsJobTest.java @@ -271,6 +271,8 @@ public DeterminePartitionsJobTest( null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java index 2b134c2aa364..2ed052eafec9 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerConfigTest.java @@ -96,6 +96,8 @@ public void testHashedBucketSelection() null, false, false, + null, + null, null ) ); @@ -170,6 +172,8 @@ public void testNoneShardSpecBucketSelection() null, false, false, + null, + null, null ) ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java index dd7975d9b0a2..c4e5db5fe01f 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopDruidIndexerMapperTest.java @@ -42,11 +42,15 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Mapper; import org.easymock.EasyMock; import org.junit.Assert; import org.junit.Test; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -144,6 +148,8 @@ public void testHadoopyStringParserWithTransformSpec() throws Exception ); final Mapper.Context mapContext = EasyMock.mock(Mapper.Context.class); EasyMock.expect(mapContext.getConfiguration()).andReturn(hadoopConfig).once(); + EasyMock.expect(mapContext.getCounter(HadoopDruidIndexerConfig.IndexJobCounters.ROWS_THROWN_AWAY_COUNTER)) + .andReturn(getTestCounter()); EasyMock.replay(mapContext); mapper.setup(mapContext); final List> rows = ImmutableList.of( @@ -189,6 +195,66 @@ private static Map rowToMap(final InputRow row) return builder.build(); } + private static Counter getTestCounter() + { + return new Counter() + { + @Override + public void setDisplayName(String displayName) + { + + } + + @Override + public String getName() + { + return null; + } + + @Override + public String getDisplayName() + { + return null; + } + + @Override + public long getValue() + { + return 0; + } + + @Override + public void setValue(long value) + { + + } + + @Override + public void increment(long incr) + { + + } + + @Override + public Counter getUnderlyingCounter() + { + return null; + } + + @Override + public void write(DataOutput out) throws IOException + { + + } + + @Override + public void readFields(DataInput in) throws IOException + { + + } + }; + } + public static class MyMapper extends HadoopDruidIndexerMapper { private final List rows = new ArrayList<>(); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java index 753379ba709b..9e4a26a22b43 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/HadoopTuningConfigTest.java @@ -58,6 +58,8 @@ public void testSerde() throws Exception null, true, true, + null, + null, null ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java index 9eb75e27b779..3bfb1fb39832 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorCombinerTest.java @@ -175,8 +175,8 @@ public void testMultipleRowsMerged() throws Exception ) ); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true)), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true)) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); @@ -253,8 +253,8 @@ public void testMultipleRowsNotMerged() throws Exception Map typeHelperMap = InputRowSerde.getTypeHelperMap(dimensionsSpec); List rows = Lists.newArrayList( - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators, true)), - new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators, true)) + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row1, aggregators).getSerializedRow()), + new BytesWritable(InputRowSerde.toBytes(typeHelperMap, row2, aggregators).getSerializedRow()) ); Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java index 5fc2d1c5f256..1b422b6c223e 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/IndexGeneratorJobTest.java @@ -523,6 +523,8 @@ public void setUp() throws Exception null, forceExtendableShardSpecs, false, + null, + null, null ) ) @@ -580,7 +582,7 @@ public void testIndexGeneratorJob() throws IOException private void verifyJob(IndexGeneratorJob job) throws IOException { - JobHelper.runJobs(ImmutableList.of(job), config); + Assert.assertTrue(JobHelper.runJobs(ImmutableList.of(job), config)); int segmentNum = 0; for (DateTime currTime = interval.getStart(); currTime.isBefore(interval.getEnd()); currTime = currTime.plusDays(1)) { diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java index 71609e42dd32..0b72d31a71aa 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/InputRowSerdeTest.java @@ -30,7 +30,6 @@ import io.druid.data.input.impl.StringDimensionSchema; import io.druid.hll.HyperLogLogCollector; import io.druid.jackson.AggregatorsModule; -import io.druid.java.util.common.parsers.ParseException; import io.druid.query.aggregation.Aggregator; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregator; @@ -124,7 +123,8 @@ public Aggregator factorize(ColumnSelectorFactory metricFactory) null ); - byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, false); // Ignore Unparseable aggregator + byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories) + .getSerializedRow(); // Ignore Unparseable aggregator InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories); Assert.assertEquals(timestamp, out.getTimestampFromEpoch()); @@ -173,14 +173,21 @@ public void testThrowParseExceptions() null ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("Encountered parse error for aggregator[unparseable]"); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + InputRowSerde.SerializeResult result = InputRowSerde.toBytes( + InputRowSerde.getTypeHelperMap(dimensionsSpec), + in, + aggregatorFactories + ); + Assert.assertEquals( + Arrays.asList("Unable to parse value[m3v] for field[m3]"), + result.getParseExceptionMessages() + ); } @Test public void testDimensionParseExceptions() { + InputRowSerde.SerializeResult result; InputRow in = new MapBasedInputRow( timestamp, dims, @@ -190,8 +197,6 @@ public void testDimensionParseExceptions() new LongSumAggregatorFactory("m2out", "m2") }; - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to long"); DimensionsSpec dimensionsSpec = new DimensionsSpec( Arrays.asList( new LongDimensionSchema("d1") @@ -199,10 +204,12 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to long"), + result.getParseExceptionMessages() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to float"); dimensionsSpec = new DimensionsSpec( Arrays.asList( new FloatDimensionSchema("d1") @@ -210,10 +217,12 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to float"), + result.getParseExceptionMessages() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [d1v] to double"); dimensionsSpec = new DimensionsSpec( Arrays.asList( new DoubleDimensionSchema("d1") @@ -221,6 +230,10 @@ public void testDimensionParseExceptions() null, null ); - InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories, true); + result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories); + Assert.assertEquals( + Arrays.asList("could not convert value [d1v] to double"), + result.getParseExceptionMessages() + ); } } diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java index e8b5888e324a..c768e2c8e10d 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/JobHelperTest.java @@ -126,6 +126,8 @@ public void setup() throws Exception null, false, false, + null, + null, null ) ) diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java index 3aec576f4f56..b4caeed21f43 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/path/GranularityPathSpecTest.java @@ -73,6 +73,8 @@ public class GranularityPathSpecTest null, false, false, + null, + null, null ); diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java index 255e0a9dc4eb..c8d763544340 100644 --- a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java +++ b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java @@ -212,6 +212,8 @@ public InputStream openStream() throws IOException null, false, false, + null, + null, null ) ) @@ -251,7 +253,7 @@ public boolean run() new SQLMetadataStorageUpdaterJobHandler(connector) ) ); - JobHelper.runJobs(jobs, hadoopDruidIndexerConfig); + Assert.assertTrue(JobHelper.runJobs(jobs, hadoopDruidIndexerConfig)); } private List getDataSegments( diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java new file mode 100644 index 000000000000..3c636f2678f4 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReport.java @@ -0,0 +1,102 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +import java.util.Objects; + +@JsonTypeName("ingestionStatsAndErrors") +public class IngestionStatsAndErrorsTaskReport implements TaskReport +{ + public static final String REPORT_KEY = "ingestionStatsAndErrors"; + + @JsonProperty + private String taskId; + + @JsonProperty + private IngestionStatsAndErrorsTaskReportData payload; + + public IngestionStatsAndErrorsTaskReport( + @JsonProperty("taskId") String taskId, + @JsonProperty("payload") IngestionStatsAndErrorsTaskReportData payload + ) + { + this.taskId = taskId; + this.payload = payload; + } + + @Override + public String getTaskId() + { + return taskId; + } + + @Override + public String getReportKey() + { + return REPORT_KEY; + } + + @Override + public Object getPayload() + { + return payload; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IngestionStatsAndErrorsTaskReport that = (IngestionStatsAndErrorsTaskReport) o; + return Objects.equals(getTaskId(), that.getTaskId()) && + Objects.equals(getPayload(), that.getPayload()); + } + + @Override + public int hashCode() + { + return Objects.hash(getTaskId(), getPayload()); + } + + @Override + public String toString() + { + return "IngestionStatsAndErrorsTaskReport{" + + "taskId='" + taskId + '\'' + + ", payload=" + payload + + '}'; + } + + // TaskReports are put into a Map and serialized. + // Jackson doesn't normally serialize the TaskReports with a "type" field in that situation, + // so explictly serialize the "type" field (otherwise, deserialization fails). + @JsonProperty("type") + private String getType() + { + return "ingestionStatsAndErrors"; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java new file mode 100644 index 000000000000..24114e5f11eb --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/IngestionStatsAndErrorsTaskReportData.java @@ -0,0 +1,119 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common; + +import com.fasterxml.jackson.annotation.JsonProperty; +import io.druid.indexer.IngestionState; + +import java.util.Map; +import java.util.Objects; + +public class IngestionStatsAndErrorsTaskReportData +{ + @JsonProperty + private IngestionState ingestionState; + + @JsonProperty + private Map unparseableEvents; + + @JsonProperty + private Map rowStats; + + @JsonProperty + private String errorMsg; + + public IngestionStatsAndErrorsTaskReportData( + @JsonProperty("ingestionState") IngestionState ingestionState, + @JsonProperty("unparseableEvents") Map unparseableEvents, + @JsonProperty("rowStats") Map rowStats, + @JsonProperty("errorMsg") String errorMsg + ) + { + this.ingestionState = ingestionState; + this.unparseableEvents = unparseableEvents; + this.rowStats = rowStats; + this.errorMsg = errorMsg; + } + + @JsonProperty + public IngestionState getIngestionState() + { + return ingestionState; + } + + @JsonProperty + public Map getUnparseableEvents() + { + return unparseableEvents; + } + + @JsonProperty + public Map getRowStats() + { + return rowStats; + } + + @JsonProperty + public String getErrorMsg() + { + return errorMsg; + } + + public static IngestionStatsAndErrorsTaskReportData getPayloadFromTaskReports( + Map taskReports + ) + { + return (IngestionStatsAndErrorsTaskReportData) taskReports.get(IngestionStatsAndErrorsTaskReport.REPORT_KEY) + .getPayload(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IngestionStatsAndErrorsTaskReportData that = (IngestionStatsAndErrorsTaskReportData) o; + return getIngestionState() == that.getIngestionState() && + Objects.equals(getUnparseableEvents(), that.getUnparseableEvents()) && + Objects.equals(getRowStats(), that.getRowStats()) && + Objects.equals(getErrorMsg(), that.getErrorMsg()); + } + + @Override + public int hashCode() + { + return Objects.hash(getIngestionState(), getUnparseableEvents(), getRowStats(), getErrorMsg()); + } + + @Override + public String toString() + { + return "IngestionStatsAndErrorsTaskReportData{" + + "ingestionState=" + ingestionState + + ", unparseableEvents=" + unparseableEvents + + ", rowStats=" + rowStats + + ", errorMsg='" + errorMsg + '\'' + + '}'; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java index eff6520741ba..335b75bc3c66 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskReport.java @@ -31,6 +31,7 @@ */ @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") @JsonSubTypes(value = { + @JsonSubTypes.Type(name = "ingestionStatsAndErrors", value = IngestionStatsAndErrorsTaskReport.class) }) public interface TaskReport { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java index eb5e9d9db40b..392fdc527aea 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskReportFileWriter.java @@ -24,6 +24,7 @@ import org.apache.commons.io.FileUtils; import java.io.File; +import java.util.Map; public class TaskReportFileWriter { @@ -37,14 +38,14 @@ public TaskReportFileWriter(File reportFile) this.reportsFile = reportFile; } - public void write(TaskReport report) + public void write(Map reports) { try { final File reportsFileParent = reportsFile.getParentFile(); if (reportsFileParent != null) { FileUtils.forceMkdir(reportsFileParent); } - objectMapper.writeValue(reportsFile, report); + objectMapper.writeValue(reportsFile, reports); } catch (Exception e) { log.error(e, "Encountered exception in write()."); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java index b0249440c7a1..61e64917ca82 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/TaskStatus.java @@ -34,40 +34,66 @@ */ public class TaskStatus { + public static final int MAX_ERROR_MSG_LENGTH = 100; + public static TaskStatus running(String taskId) { - return new TaskStatus(taskId, TaskState.RUNNING, -1); + return new TaskStatus(taskId, TaskState.RUNNING, -1, null); } public static TaskStatus success(String taskId) { - return new TaskStatus(taskId, TaskState.SUCCESS, -1); + return new TaskStatus(taskId, TaskState.SUCCESS, -1, null); + } + + public static TaskStatus success(String taskId, String errorMsg) + { + return new TaskStatus(taskId, TaskState.SUCCESS, -1, errorMsg); } public static TaskStatus failure(String taskId) { - return new TaskStatus(taskId, TaskState.FAILED, -1); + return new TaskStatus(taskId, TaskState.FAILED, -1, null); + } + + public static TaskStatus failure(String taskId, String errorMsg) + { + return new TaskStatus(taskId, TaskState.FAILED, -1, errorMsg); } public static TaskStatus fromCode(String taskId, TaskState code) { - return new TaskStatus(taskId, code, -1); + return new TaskStatus(taskId, code, -1, null); + } + + // The error message can be large, so truncate it to avoid storing large objects in zookeeper/metadata storage. + // The full error message will be available via a TaskReport. + private static String truncateErrorMsg(String errorMsg) + { + if (errorMsg != null && errorMsg.length() > MAX_ERROR_MSG_LENGTH) { + return errorMsg.substring(0, MAX_ERROR_MSG_LENGTH) + "..."; + } else { + return errorMsg; + } } private final String id; private final TaskState status; private final long duration; + private final String errorMsg; @JsonCreator protected TaskStatus( @JsonProperty("id") String id, @JsonProperty("status") TaskState status, - @JsonProperty("duration") long duration + @JsonProperty("duration") long duration, + @JsonProperty("errorMsg") String errorMsg ) { this.id = id; this.status = status; this.duration = duration; + this.errorMsg = truncateErrorMsg(errorMsg); // Check class invariants. Preconditions.checkNotNull(id, "id"); @@ -92,6 +118,12 @@ public long getDuration() return duration; } + @JsonProperty("errorMsg") + public String getErrorMsg() + { + return errorMsg; + } + /** * Signals that a task is not yet complete, and is still runnable on a worker. Exactly one of isRunnable, * isSuccess, or isFailure will be true at any one time. @@ -141,7 +173,18 @@ public boolean isFailure() public TaskStatus withDuration(long _duration) { - return new TaskStatus(id, status, _duration); + return new TaskStatus(id, status, _duration, errorMsg); + } + + @Override + public String toString() + { + return Objects.toStringHelper(this) + .add("id", id) + .add("status", status) + .add("duration", duration) + .add("errorMsg", errorMsg) + .toString(); } @Override @@ -154,24 +197,15 @@ public boolean equals(Object o) return false; } TaskStatus that = (TaskStatus) o; - return duration == that.duration && - java.util.Objects.equals(id, that.id) && - status == that.status; + return getDuration() == that.getDuration() && + java.util.Objects.equals(getId(), that.getId()) && + status == that.status && + java.util.Objects.equals(getErrorMsg(), that.getErrorMsg()); } @Override public int hashCode() { - return java.util.Objects.hash(id, status, duration); - } - - @Override - public String toString() - { - return Objects.toStringHelper(this) - .add("id", id) - .add("status", status) - .add("duration", duration) - .toString(); + return java.util.Objects.hash(getId(), status, getDuration(), getErrorMsg()); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java b/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java index a7084f69d436..06c6069ae009 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/index/RealtimeAppenderatorTuningConfig.java @@ -65,6 +65,10 @@ private static File createNewBasePersistDirectory() @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @JsonCreator public RealtimeAppenderatorTuningConfig( @JsonProperty("maxRowsInMemory") Integer maxRowsInMemory, @@ -77,7 +81,10 @@ public RealtimeAppenderatorTuningConfig( @JsonProperty("reportParseExceptions") Boolean reportParseExceptions, @JsonProperty("publishAndHandoffTimeout") Long publishAndHandoffTimeout, @JsonProperty("alertTimeout") Long alertTimeout, - @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { this.maxRowsInMemory = maxRowsInMemory == null ? defaultMaxRowsInMemory : maxRowsInMemory; @@ -100,6 +107,17 @@ public RealtimeAppenderatorTuningConfig( this.alertTimeout = alertTimeout == null ? defaultAlertTimeout : alertTimeout; Preconditions.checkArgument(this.alertTimeout >= 0, "alertTimeout must be >= 0"); this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } @Override @@ -176,6 +194,24 @@ public SegmentWriteOutMediumFactory getSegmentWriteOutMediumFactory() return segmentWriteOutMediumFactory; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + public RealtimeAppenderatorTuningConfig withBasePersistDirectory(File dir) { return new RealtimeAppenderatorTuningConfig( @@ -189,7 +225,10 @@ public RealtimeAppenderatorTuningConfig withBasePersistDirectory(File dir) reportParseExceptions, publishAndHandoffTimeout, alertTimeout, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java b/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java index 0bba29fbe881..5b25a3f04085 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/index/YeOldePlumberSchool.java @@ -130,7 +130,7 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - final int numRows = sink.add(row, false); + final int numRows = sink.add(row, false).getRowCount(); if (!sink.canAppendRow()) { persist(committerSupplier.get()); diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java index cf408e14aaa0..9e2751ffe7ff 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTask.java @@ -19,13 +19,16 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Optional; import com.google.common.base.Supplier; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import io.druid.data.input.Committer; @@ -35,8 +38,13 @@ import io.druid.discovery.DiscoveryDruidNode; import io.druid.discovery.DruidNodeDiscoveryProvider; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; @@ -58,6 +66,7 @@ import io.druid.segment.indexing.RealtimeIOConfig; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult; @@ -65,14 +74,27 @@ import io.druid.segment.realtime.appenderator.SegmentsAndMetadata; import io.druid.segment.realtime.appenderator.StreamAppenderatorDriver; import io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.segment.realtime.firehose.ClippedFirehoseFactory; import io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory; import io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory; import io.druid.segment.realtime.plumber.Committers; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizerMapper; +import io.druid.utils.CircularBuffer; import org.apache.commons.io.FileUtils; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import java.io.File; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Random; @@ -84,7 +106,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; -public class AppenderatorDriverRealtimeIndexTask extends AbstractTask +public class AppenderatorDriverRealtimeIndexTask extends AbstractTask implements ChatHandler { private static final String CTX_KEY_LOOKUP_TIER = "lookupTier"; @@ -121,6 +143,9 @@ private static String makeTaskId(RealtimeAppenderatorIngestionSpec spec) @JsonIgnore private volatile FireDepartmentMetrics metrics = null; + @JsonIgnore + private TaskMetricsGetter metricsGetter; + @JsonIgnore private volatile boolean gracefullyStopped = false; @@ -130,12 +155,29 @@ private static String makeTaskId(RealtimeAppenderatorIngestionSpec spec) @JsonIgnore private volatile Thread runThread = null; + @JsonIgnore + private CircularBuffer savedParseExceptions; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private IngestionState ingestionState; + + @JsonIgnore + private String errorMsg; + @JsonCreator public AppenderatorDriverRealtimeIndexTask( @JsonProperty("id") String id, @JsonProperty("resource") TaskResource taskResource, @JsonProperty("spec") RealtimeAppenderatorIngestionSpec spec, - @JsonProperty("context") Map context + @JsonProperty("context") Map context, + @JacksonInject ChatHandlerProvider chatHandlerProvider, + @JacksonInject AuthorizerMapper authorizerMapper ) { super( @@ -147,6 +189,14 @@ public AppenderatorDriverRealtimeIndexTask( ); this.spec = spec; this.pendingHandoffs = new ConcurrentLinkedQueue<>(); + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); + this.authorizerMapper = authorizerMapper; + + if (spec.getTuningConfig().getMaxSavedParseExceptions() > 0) { + savedParseExceptions = new CircularBuffer<>(spec.getTuningConfig().getMaxSavedParseExceptions()); + } + + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -207,6 +257,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null ); this.metrics = fireDepartmentForMetrics.getMetrics(); + metricsGetter = new FireDepartmentMetricsTaskMetricsGetter(metrics); Supplier committerSupplier = null; final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); @@ -217,6 +268,13 @@ dataSchema, new RealtimeIOConfig(null, null, null), null StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics); try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + toolbox.getDataSegmentServerAnnouncer().announce(); toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode); @@ -248,6 +306,8 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } } + ingestionState = IngestionState.BUILD_SEGMENTS; + // Time to read data! while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) { try { @@ -273,19 +333,20 @@ dataSchema, new RealtimeIOConfig(null, null, null), null throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp()); } - metrics.incrementProcessed(); + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException()); + } else { + metrics.incrementProcessed(); + } } } catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; - } else { - log.debug(e, "Discarded row due to exception, considering unparseable."); - metrics.incrementUnparseable(); - } + handleParseException(e); } } + ingestionState = IngestionState.COMPLETED; + if (!gracefullyStopped) { synchronized (this) { if (gracefullyStopped) { @@ -312,9 +373,18 @@ dataSchema, new RealtimeIOConfig(null, null, null), null catch (Throwable e) { log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()) .emit(); - throw e; + errorMsg = Throwables.getStackTraceAsString(e); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); } finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + CloseQuietly.close(firehose); CloseQuietly.close(appenderator); CloseQuietly.close(driver); @@ -326,7 +396,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } log.info("Job done!"); - toolbox.getTaskReportFileWriter().write(null); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); return TaskStatus.success(getId()); } @@ -387,6 +457,41 @@ public RealtimeAppenderatorIngestionSpec getSpec() return spec; } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (metricsGetter != null) { + totalsMap.put( + "buildSegments", + metricsGetter.getTotalMetrics() + ); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + List events = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + return Response.ok(events).build(); + } + /** * Is a firehose from this factory drainable by closing it? If so, we should drain on stopGracefully rather than * abruptly stopping. @@ -404,6 +509,66 @@ && isFirehoseDrainableByClosing(((TimedShutoffFirehoseFactory) firehoseFactory). && isFirehoseDrainableByClosing(((ClippedFirehoseFactory) firehoseFactory).getDelegate())); } + private Map getTaskCompletionReports() + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats(), + errorMsg + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions(savedParseExceptions); + if (buildSegmentsParseExceptionMessages != null) { + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + } + return unparseableEventsMap; + } + + private Map getTaskCompletionRowStats() + { + Map metricsMap = Maps.newHashMap(); + if (metricsGetter != null) { + metricsMap.put( + "buildSegments", + metricsGetter.getTotalMetrics() + ); + } + return metricsMap; + } + + private void handleParseException(ParseException pe) + { + if (pe.isFromPartiallyValidRow()) { + metrics.incrementProcessedWithErrors(); + } else { + metrics.incrementUnparseable(); + } + + if (spec.getTuningConfig().isLogParseExceptions()) { + log.error(pe, "Encountered parse exception: "); + } + + if (savedParseExceptions != null) { + savedParseExceptions.add(pe); + } + + if (metrics.unparseable() + metrics.processedWithErrors() + > spec.getTuningConfig().getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); + } + } + private void setupTimeoutAlert() { if (spec.getTuningConfig().getAlertTimeout() > 0) { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java index 411715a814fd..a751958f200d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/CompactionTask.java @@ -65,6 +65,7 @@ import io.druid.segment.indexing.granularity.ArbitraryGranularitySpec; import io.druid.segment.indexing.granularity.GranularitySpec; import io.druid.segment.loading.SegmentLoadingException; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import io.druid.timeline.TimelineObjectHolder; import io.druid.timeline.VersionedIntervalTimeline; @@ -102,6 +103,9 @@ public class CompactionTask extends AbstractTask @JsonIgnore private IndexTask indexTaskSpec; + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + @JsonCreator public CompactionTask( @JsonProperty("id") final String id, @@ -112,7 +116,8 @@ public CompactionTask( @Nullable @JsonProperty("dimensions") final DimensionsSpec dimensionsSpec, @Nullable @JsonProperty("tuningConfig") final IndexTuningConfig tuningConfig, @Nullable @JsonProperty("context") final Map context, - @JacksonInject ObjectMapper jsonMapper + @JacksonInject ObjectMapper jsonMapper, + @JacksonInject AuthorizerMapper authorizerMapper ) { super(getOrMakeId(id, TYPE, dataSource), null, taskResource, dataSource, context); @@ -125,6 +130,7 @@ public CompactionTask( this.tuningConfig = tuningConfig; this.jsonMapper = jsonMapper; this.segmentProvider = segments == null ? new SegmentProvider(dataSource, interval) : new SegmentProvider(segments); + this.authorizerMapper = authorizerMapper; } @JsonProperty @@ -195,7 +201,9 @@ public TaskStatus run(final TaskToolbox toolbox) throws Exception getTaskResource(), getDataSource(), ingestionSpec, - getContext() + getContext(), + authorizerMapper, + null ); } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java index f8e80e569a36..984a9fd6a523 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopIndexTask.java @@ -23,20 +23,27 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Optional; import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import io.druid.indexer.HadoopDruidDetermineConfigurationJob; import io.druid.indexer.HadoopDruidIndexerConfig; import io.druid.indexer.HadoopDruidIndexerJob; import io.druid.indexer.HadoopIngestionSpec; -import io.druid.indexer.Jobby; +import io.druid.indexer.IngestionState; import io.druid.indexer.MetadataStorageUpdaterJobHandler; +import io.druid.indexer.TaskMetricsGetter; +import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; +import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.LockAcquireAction; @@ -47,14 +54,29 @@ import io.druid.java.util.common.JodaUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.logger.Logger; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import org.joda.time.Interval; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.SortedSet; -public class HadoopIndexTask extends HadoopTask +public class HadoopIndexTask extends HadoopTask implements ChatHandler { private static final Logger log = new Logger(HadoopIndexTask.class); @@ -72,6 +94,30 @@ private static String getTheDataSource(HadoopIngestionSpec spec) @JsonIgnore private final ObjectMapper jsonMapper; + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private InnerProcessingStatsGetter determinePartitionsStatsGetter; + + @JsonIgnore + private InnerProcessingStatsGetter buildSegmentsStatsGetter; + + @JsonIgnore + private IngestionState ingestionState; + + @JsonIgnore + private HadoopDetermineConfigInnerProcessingStatus determineConfigStatus = null; + + @JsonIgnore + private HadoopIndexGeneratorInnerProcessingStatus buildSegmentsStatus = null; + + @JsonIgnore + private String errorMsg; + /** * @param spec is used by the HadoopDruidIndexerJob to set up the appropriate parameters * for creating Druid index segments. It may be modified. @@ -90,7 +136,9 @@ public HadoopIndexTask( @JsonProperty("hadoopDependencyCoordinates") List hadoopDependencyCoordinates, @JsonProperty("classpathPrefix") String classpathPrefix, @JacksonInject ObjectMapper jsonMapper, - @JsonProperty("context") Map context + @JsonProperty("context") Map context, + @JacksonInject AuthorizerMapper authorizerMapper, + @JacksonInject ChatHandlerProvider chatHandlerProvider ) { super( @@ -101,8 +149,8 @@ public HadoopIndexTask( : hadoopDependencyCoordinates, context ); - - + this.authorizerMapper = authorizerMapper; + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); this.spec = spec; // Some HadoopIngestionSpec stuff doesn't make sense in the context of the indexing service @@ -118,6 +166,7 @@ public HadoopIndexTask( this.classpathPrefix = classpathPrefix; this.jsonMapper = Preconditions.checkNotNull(jsonMapper, "null ObjectMappper"); + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -168,9 +217,46 @@ public String getClasspathPrefix() return classpathPrefix; } - @SuppressWarnings("unchecked") @Override public TaskStatus run(TaskToolbox toolbox) throws Exception + { + try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + + return runInternal(toolbox); + } + catch (Exception e) { + Throwable effectiveException; + if (e instanceof RuntimeException && e.getCause() instanceof InvocationTargetException) { + InvocationTargetException ite = (InvocationTargetException) e.getCause(); + effectiveException = ite.getCause(); + log.error(effectiveException, "Got invocation target exception in run(), cause: "); + } else { + effectiveException = e; + log.error(e, "Encountered exception in run():"); + } + + errorMsg = Throwables.getStackTraceAsString(effectiveException); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); + } + finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + } + } + + @SuppressWarnings("unchecked") + private TaskStatus runInternal(TaskToolbox toolbox) throws Exception { final ClassLoader loader = buildClassLoader(toolbox); boolean determineIntervals = !spec.getDataSchema().getGranularitySpec().bucketIntervals().isPresent(); @@ -181,20 +267,56 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception new OverlordActionBasedUsedSegmentLister(toolbox) ); - final String config = invokeForeignLoader( - "io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessing", - new String[]{ - toolbox.getObjectMapper().writeValueAsString(spec), - toolbox.getConfig().getHadoopWorkingPath(), - toolbox.getSegmentPusher().getPathForHadoop() - }, + Object determinePartitionsInnerProcessingRunner = getForeignClassloaderObject( + "io.druid.indexing.common.task.HadoopIndexTask$HadoopDetermineConfigInnerProcessingRunner", loader ); + determinePartitionsStatsGetter = new InnerProcessingStatsGetter(determinePartitionsInnerProcessingRunner); + + String[] determinePartitionsInput = new String[]{ + toolbox.getObjectMapper().writeValueAsString(spec), + toolbox.getConfig().getHadoopWorkingPath(), + toolbox.getSegmentPusher().getPathForHadoop() + }; + + HadoopIngestionSpec indexerSchema = null; + final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); + Class determinePartitionsRunnerClass = determinePartitionsInnerProcessingRunner.getClass(); + Method determinePartitionsInnerProcessingRunTask = determinePartitionsRunnerClass.getMethod( + "runTask", + determinePartitionsInput.getClass() + ); + try { + Thread.currentThread().setContextClassLoader(loader); + + ingestionState = IngestionState.DETERMINE_PARTITIONS; + + final String determineConfigStatusString = (String) determinePartitionsInnerProcessingRunTask.invoke( + determinePartitionsInnerProcessingRunner, + new Object[]{determinePartitionsInput} + ); + - final HadoopIngestionSpec indexerSchema = toolbox - .getObjectMapper() - .readValue(config, HadoopIngestionSpec.class); + determineConfigStatus = toolbox + .getObjectMapper() + .readValue(determineConfigStatusString, HadoopDetermineConfigInnerProcessingStatus.class); + indexerSchema = determineConfigStatus.getSchema(); + if (indexerSchema == null) { + errorMsg = determineConfigStatus.getErrorMsg(); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } // We should have a lock from before we started running only if interval was specified String version; @@ -236,40 +358,187 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception log.info("Setting version to: %s", version); - final String segments = invokeForeignLoader( - "io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessing", - new String[]{ - toolbox.getObjectMapper().writeValueAsString(indexerSchema), - version - }, + Object innerProcessingRunner = getForeignClassloaderObject( + "io.druid.indexing.common.task.HadoopIndexTask$HadoopIndexGeneratorInnerProcessingRunner", loader ); + buildSegmentsStatsGetter = new InnerProcessingStatsGetter(innerProcessingRunner); + + String[] buildSegmentsInput = new String[]{ + toolbox.getObjectMapper().writeValueAsString(indexerSchema), + version + }; + + Class buildSegmentsRunnerClass = innerProcessingRunner.getClass(); + Method innerProcessingRunTask = buildSegmentsRunnerClass.getMethod("runTask", buildSegmentsInput.getClass()); - if (segments != null) { - List publishedSegments = toolbox.getObjectMapper().readValue( - segments, - new TypeReference>() - { - } + try { + Thread.currentThread().setContextClassLoader(loader); + + ingestionState = IngestionState.BUILD_SEGMENTS; + final String jobStatusString = (String) innerProcessingRunTask.invoke( + innerProcessingRunner, + new Object[]{buildSegmentsInput} ); - toolbox.publishSegments(publishedSegments); - toolbox.getTaskReportFileWriter().write(null); - return TaskStatus.success(getId()); - } else { - toolbox.getTaskReportFileWriter().write(null); - return TaskStatus.failure(getId()); + buildSegmentsStatus = toolbox.getObjectMapper().readValue( + jobStatusString, + HadoopIndexGeneratorInnerProcessingStatus.class + ); + + if (buildSegmentsStatus.getDataSegments() != null) { + ingestionState = IngestionState.COMPLETED; + toolbox.publishSegments(buildSegmentsStatus.getDataSegments()); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.success( + getId(), + null + ); + } else { + errorMsg = buildSegmentsStatus.getErrorMsg(); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); + } + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } + } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req, + @QueryParam("windows") List windows + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + if (determinePartitionsStatsGetter != null) { + totalsMap.put("determinePartitions", determinePartitionsStatsGetter.getTotalMetrics()); + } + + if (buildSegmentsStatsGetter != null) { + totalsMap.put("buildSegments", buildSegmentsStatsGetter.getTotalMetrics()); + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + + private Map getTaskCompletionReports() + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + null, + getTaskCompletionRowStats(), + errorMsg + ) + ) + ); + } + + private Map getTaskCompletionRowStats() + { + Map metrics = Maps.newHashMap(); + if (determineConfigStatus != null) { + metrics.put( + "determinePartitions", + determineConfigStatus.getMetrics() + ); + } + if (buildSegmentsStatus != null) { + metrics.put( + "buildSegments", + buildSegmentsStatus.getMetrics() + ); + } + return metrics; + } + + public static class InnerProcessingStatsGetter implements TaskMetricsGetter + { + public static final List KEYS = Arrays.asList( + TaskMetricsUtils.ROWS_PROCESSED, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, + TaskMetricsUtils.ROWS_THROWN_AWAY, + TaskMetricsUtils.ROWS_UNPARSEABLE + ); + + private final Method getStatsMethod; + private final Object innerProcessingRunner; + + public InnerProcessingStatsGetter( + Object innerProcessingRunner + ) + { + try { + Class aClazz = innerProcessingRunner.getClass(); + this.getStatsMethod = aClazz.getMethod("getStats"); + this.innerProcessingRunner = innerProcessingRunner; + } + catch (NoSuchMethodException nsme) { + throw new RuntimeException(nsme); + } + } + + @Override + public List getKeys() + { + return KEYS; + } + + @Override + public Map getTotalMetrics() + { + try { + Map statsMap = (Map) getStatsMethod.invoke(innerProcessingRunner); + if (statsMap == null) { + return null; + } + long curProcessed = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED); + long curProcessedWithErrors = (Long) statsMap.get(TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS); + long curThrownAway = (Long) statsMap.get(TaskMetricsUtils.ROWS_THROWN_AWAY); + long curUnparseable = (Long) statsMap.get(TaskMetricsUtils.ROWS_UNPARSEABLE); + + return ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, curProcessed, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, curProcessedWithErrors, + TaskMetricsUtils.ROWS_THROWN_AWAY, curThrownAway, + TaskMetricsUtils.ROWS_UNPARSEABLE, curUnparseable + ); + } + catch (Exception e) { + log.error(e, "Got exception from getTotalMetrics(): "); + return null; + } } } + /** Called indirectly in {@link HadoopIndexTask#run(TaskToolbox)}. */ @SuppressWarnings("unused") - public static class HadoopIndexGeneratorInnerProcessing + public static class HadoopDetermineConfigInnerProcessingRunner { - public static String runTask(String[] args) throws Exception + private HadoopDruidDetermineConfigurationJob job; + + public String runTask(String[] args) throws Exception { final String schema = args[0]; - String version = args[1]; + final String workingPath = args[1]; + final String segmentOutputPath = args[2]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER .readValue( @@ -278,38 +547,43 @@ public static String runTask(String[] args) throws Exception ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema - .withTuningConfig(theSchema.getTuningConfig().withVersion(version)) + .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) + .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)) ); - // MetadataStorageUpdaterJobHandler is only needed when running standalone without indexing service - // In that case the whatever runs the Hadoop Index Task must ensure MetadataStorageUpdaterJobHandler - // can be injected based on the configuration given in config.getSchema().getIOConfig().getMetadataUpdateSpec() - final MetadataStorageUpdaterJobHandler maybeHandler; - if (config.isUpdaterJobSpecSet()) { - maybeHandler = injector.getInstance(MetadataStorageUpdaterJobHandler.class); + job = new HadoopDruidDetermineConfigurationJob(config); + + log.info("Starting a hadoop determine configuration job..."); + if (job.run()) { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopDetermineConfigInnerProcessingStatus(config.getSchema(), job.getStats(), null) + ); } else { - maybeHandler = null; + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopDetermineConfigInnerProcessingStatus(null, job.getStats(), job.getErrorMessage()) + ); } - HadoopDruidIndexerJob job = new HadoopDruidIndexerJob(config, maybeHandler); + } - log.info("Starting a hadoop index generator job..."); - if (job.run()) { - return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(job.getPublishedSegments()); + public Map getStats() + { + if (job == null) { + return null; } - return null; + return job.getStats(); } } - /** Called indirectly in {@link HadoopIndexTask#run(TaskToolbox)}. */ @SuppressWarnings("unused") - public static class HadoopDetermineConfigInnerProcessing + public static class HadoopIndexGeneratorInnerProcessingRunner { - public static String runTask(String[] args) throws Exception + private HadoopDruidIndexerJob job; + + public String runTask(String[] args) throws Exception { final String schema = args[0]; - final String workingPath = args[1]; - final String segmentOutputPath = args[2]; + String version = args[1]; final HadoopIngestionSpec theSchema = HadoopDruidIndexerConfig.JSON_MAPPER .readValue( @@ -318,18 +592,133 @@ public static String runTask(String[] args) throws Exception ); final HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromSpec( theSchema - .withIOConfig(theSchema.getIOConfig().withSegmentOutputPath(segmentOutputPath)) - .withTuningConfig(theSchema.getTuningConfig().withWorkingPath(workingPath)) + .withTuningConfig(theSchema.getTuningConfig().withVersion(version)) ); - Jobby job = new HadoopDruidDetermineConfigurationJob(config); + // MetadataStorageUpdaterJobHandler is only needed when running standalone without indexing service + // In that case the whatever runs the Hadoop Index Task must ensure MetadataStorageUpdaterJobHandler + // can be injected based on the configuration given in config.getSchema().getIOConfig().getMetadataUpdateSpec() + final MetadataStorageUpdaterJobHandler maybeHandler; + if (config.isUpdaterJobSpecSet()) { + maybeHandler = injector.getInstance(MetadataStorageUpdaterJobHandler.class); + } else { + maybeHandler = null; + } + job = new HadoopDruidIndexerJob(config, maybeHandler); + + log.info("Starting a hadoop index generator job..."); + try { + if (job.run()) { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + job.getPublishedSegments(), + job.getStats(), + null + ) + ); + } else { + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + null, + job.getStats(), + job.getErrorMessage() + ) + ); + } + } + catch (Exception e) { + log.error(e, "Encountered exception in HadoopIndexGeneratorInnerProcessing."); + return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString( + new HadoopIndexGeneratorInnerProcessingStatus( + null, + job.getStats(), + e.getMessage() + ) + ); + } + } - log.info("Starting a hadoop determine configuration job..."); - if (job.run()) { - return HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config.getSchema()); + public Map getStats() + { + if (job == null) { + return null; } - return null; + return job.getStats(); + } + } + + public static class HadoopIndexGeneratorInnerProcessingStatus + { + private final List dataSegments; + private final Map metrics; + private final String errorMsg; + + @JsonCreator + public HadoopIndexGeneratorInnerProcessingStatus( + @JsonProperty("dataSegments") List dataSegments, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg + ) + { + this.dataSegments = dataSegments; + this.metrics = metrics; + this.errorMsg = errorMsg; + } + + @JsonProperty + public List getDataSegments() + { + return dataSegments; + } + + @JsonProperty + public Map getMetrics() + { + return metrics; + } + + @JsonProperty + public String getErrorMsg() + { + return errorMsg; + } + } + + public static class HadoopDetermineConfigInnerProcessingStatus + { + private final HadoopIngestionSpec schema; + private final Map metrics; + private final String errorMsg; + + @JsonCreator + public HadoopDetermineConfigInnerProcessingStatus( + @JsonProperty("schema") HadoopIngestionSpec schema, + @JsonProperty("metrics") Map metrics, + @JsonProperty("errorMsg") String errorMsg + ) + { + this.schema = schema; + this.metrics = metrics; + this.errorMsg = errorMsg; + } + + @JsonProperty + public HadoopIngestionSpec getSchema() + { + return schema; + } + + @JsonProperty + public Map getMetrics() + { + return metrics; + } + + @JsonProperty + public String getErrorMsg() + { + return errorMsg; } } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index 8963559e3123..60be2b8639a1 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -223,4 +223,32 @@ public static OutputType invokeForeignLoader( Thread.currentThread().setContextClassLoader(oldLoader); } } + + /** + * This method tries to isolate class loading during a Function call + * + * @param clazzName The Class which has an instance method called `runTask` + * @param loader The loader to use as the context class loader during invocation + * + * @return The result of the method invocation + */ + public static Object getForeignClassloaderObject( + final String clazzName, + final ClassLoader loader + ) + { + log.debug("Launching [%s] on class loader [%s]", clazzName, loader); + final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); + try { + Thread.currentThread().setContextClassLoader(loader); + final Class clazz = loader.loadClass(clazzName); + return clazz.newInstance(); + } + catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { + throw Throwables.propagate(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); + } + } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 08f857ea4212..9a4daa084a2e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; @@ -31,6 +32,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; +import com.google.common.collect.Maps; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import com.google.common.util.concurrent.ListenableFuture; @@ -39,15 +41,21 @@ import io.druid.data.input.InputRow; import io.druid.data.input.Rows; import io.druid.hll.HyperLogLogCollector; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsGetter; import io.druid.indexing.appenderator.ActionBasedSegmentAllocator; import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; +import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.actions.SegmentTransactionalInsertAction; import io.druid.indexing.common.actions.TaskActionClient; import io.druid.indexing.firehose.IngestSegmentFirehoseFactory; import io.druid.java.util.common.ISE; +import io.druid.java.util.common.Intervals; import io.druid.java.util.common.JodaUtils; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularity; @@ -64,6 +72,7 @@ import io.druid.segment.indexing.granularity.GranularitySpec; import io.druid.segment.realtime.FireDepartment; import io.druid.segment.realtime.FireDepartmentMetrics; +import io.druid.segment.realtime.FireDepartmentMetricsTaskMetricsGetter; import io.druid.segment.realtime.RealtimeMetricsMonitor; import io.druid.segment.realtime.appenderator.Appenderator; import io.druid.segment.realtime.appenderator.AppenderatorConfig; @@ -75,18 +84,31 @@ import io.druid.segment.realtime.appenderator.SegmentIdentifier; import io.druid.segment.realtime.appenderator.SegmentsAndMetadata; import io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher; +import io.druid.segment.realtime.firehose.ChatHandler; +import io.druid.segment.realtime.firehose.ChatHandlerProvider; import io.druid.segment.writeout.SegmentWriteOutMediumFactory; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.NoneShardSpec; import io.druid.timeline.partition.NumberedShardSpec; import io.druid.timeline.partition.ShardSpec; +import io.druid.utils.CircularBuffer; import org.codehaus.plexus.util.FileUtils; import org.joda.time.DateTime; import org.joda.time.Interval; import org.joda.time.Period; import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; import java.io.File; import java.io.IOException; import java.util.HashMap; @@ -106,7 +128,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; -public class IndexTask extends AbstractTask +public class IndexTask extends AbstractTask implements ChatHandler { private static final Logger log = new Logger(IndexTask.class); private static final HashFunction hashFunction = Hashing.murmur3_128(); @@ -131,12 +153,44 @@ private static String makeGroupId(boolean isAppendToExisting, String dataSource) @JsonIgnore private final IndexIngestionSpec ingestionSchema; + @JsonIgnore + private IngestionState ingestionState; + + @JsonIgnore + private final AuthorizerMapper authorizerMapper; + + @JsonIgnore + private final Optional chatHandlerProvider; + + @JsonIgnore + private FireDepartmentMetrics buildSegmentsFireDepartmentMetrics; + + @JsonIgnore + private TaskMetricsGetter buildSegmentsMetricsGetter; + + @JsonIgnore + private CircularBuffer buildSegmentsSavedParseExceptions; + + @JsonIgnore + private FireDepartmentMetrics determinePartitionsFireDepartmentMetrics; + + @JsonIgnore + private TaskMetricsGetter determinePartitionsMetricsGetter; + + @JsonIgnore + private CircularBuffer determinePartitionsSavedParseExceptions; + + @JsonIgnore + private String errorMsg; + @JsonCreator public IndexTask( @JsonProperty("id") final String id, @JsonProperty("resource") final TaskResource taskResource, @JsonProperty("spec") final IndexIngestionSpec ingestionSchema, - @JsonProperty("context") final Map context + @JsonProperty("context") final Map context, + @JacksonInject AuthorizerMapper authorizerMapper, + @JacksonInject ChatHandlerProvider chatHandlerProvider ) { this( @@ -145,7 +199,9 @@ public IndexTask( taskResource, ingestionSchema.dataSchema.getDataSource(), ingestionSchema, - context + context, + authorizerMapper, + chatHandlerProvider ); } @@ -155,7 +211,9 @@ public IndexTask( TaskResource resource, String dataSource, IndexIngestionSpec ingestionSchema, - Map context + Map context, + AuthorizerMapper authorizerMapper, + ChatHandlerProvider chatHandlerProvider ) { super( @@ -165,8 +223,19 @@ public IndexTask( dataSource, context ); - this.ingestionSchema = ingestionSchema; + this.authorizerMapper = authorizerMapper; + this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider); + if (ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() > 0) { + determinePartitionsSavedParseExceptions = new CircularBuffer( + ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() + ); + + buildSegmentsSavedParseExceptions = new CircularBuffer( + ingestionSchema.getTuningConfig().getMaxSavedParseExceptions() + ); + } + this.ingestionState = IngestionState.NOT_STARTED; } @Override @@ -209,6 +278,108 @@ static boolean isReady(TaskActionClient actionClient, SortedSet interv return true; } + @GET + @Path("/unparseableEvents") + @Produces(MediaType.APPLICATION_JSON) + public Response getUnparseableEvents( + @Context final HttpServletRequest req, + @QueryParam("full") String full + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + Map> events = Maps.newHashMap(); + + boolean needsDeterminePartitions = false; + boolean needsBuildSegments = false; + + if (full != null) { + needsDeterminePartitions = true; + needsBuildSegments = true; + } else { + switch (ingestionState) { + case DETERMINE_PARTITIONS: + needsDeterminePartitions = true; + break; + case BUILD_SEGMENTS: + case COMPLETED: + needsBuildSegments = true; + break; + default: + break; + } + } + + if (needsDeterminePartitions) { + events.put( + "determinePartitions", + IndexTaskUtils.getMessagesFromSavedParseExceptions(determinePartitionsSavedParseExceptions) + ); + } + + if (needsBuildSegments) { + events.put( + "buildSegments", + IndexTaskUtils.getMessagesFromSavedParseExceptions(buildSegmentsSavedParseExceptions) + ); + } + + return Response.ok(events).build(); + } + + @GET + @Path("/rowStats") + @Produces(MediaType.APPLICATION_JSON) + public Response getRowStats( + @Context final HttpServletRequest req, + @QueryParam("full") String full + ) + { + IndexTaskUtils.datasourceAuthorizationCheck(req, Action.READ, getDataSource(), authorizerMapper); + Map returnMap = Maps.newHashMap(); + Map totalsMap = Maps.newHashMap(); + + boolean needsDeterminePartitions = false; + boolean needsBuildSegments = false; + + if (full != null) { + needsDeterminePartitions = true; + needsBuildSegments = true; + } else { + switch (ingestionState) { + case DETERMINE_PARTITIONS: + needsDeterminePartitions = true; + break; + case BUILD_SEGMENTS: + case COMPLETED: + needsBuildSegments = true; + break; + default: + break; + } + } + + if (needsDeterminePartitions) { + if (determinePartitionsMetricsGetter != null) { + totalsMap.put( + "determinePartitions", + determinePartitionsMetricsGetter.getTotalMetrics() + ); + } + } + + if (needsBuildSegments) { + if (buildSegmentsMetricsGetter != null) { + totalsMap.put( + "buildSegments", + buildSegmentsMetricsGetter.getTotalMetrics() + ); + } + } + + returnMap.put("totals", totalsMap); + return Response.ok(returnMap).build(); + } + @JsonProperty("spec") public IndexIngestionSpec getIngestionSchema() { @@ -218,56 +389,127 @@ public IndexIngestionSpec getIngestionSchema() @Override public TaskStatus run(final TaskToolbox toolbox) throws Exception { - final boolean determineIntervals = !ingestionSchema.getDataSchema() - .getGranularitySpec() - .bucketIntervals() - .isPresent(); - - final FirehoseFactory firehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory(); - - if (firehoseFactory instanceof IngestSegmentFirehoseFactory) { - // pass toolbox to Firehose - ((IngestSegmentFirehoseFactory) firehoseFactory).setTaskToolbox(toolbox); - } - - final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); - // Firehose temporary directory is automatically removed when this IndexTask completes. - FileUtils.forceMkdir(firehoseTempDir); - - final ShardSpecs shardSpecs = determineShardSpecs(toolbox, firehoseFactory, firehoseTempDir); - - final DataSchema dataSchema; - final Map versions; - if (determineIntervals) { - final SortedSet intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd()); - intervals.addAll(shardSpecs.getIntervals()); - final Map locks = Tasks.tryAcquireExclusiveLocks(toolbox.getTaskActionClient(), intervals); - versions = locks.entrySet().stream() - .collect(Collectors.toMap(Entry::getKey, entry -> entry.getValue().getVersion())); - - dataSchema = ingestionSchema.getDataSchema().withGranularitySpec( - ingestionSchema.getDataSchema() - .getGranularitySpec() - .withIntervals( - JodaUtils.condenseIntervals( - shardSpecs.getIntervals() - ) - ) + try { + if (chatHandlerProvider.isPresent()) { + log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName()); + chatHandlerProvider.get().register(getId(), this, false); + } else { + log.warn("No chat handler detected"); + } + + final boolean determineIntervals = !ingestionSchema.getDataSchema() + .getGranularitySpec() + .bucketIntervals() + .isPresent(); + + final FirehoseFactory firehoseFactory = ingestionSchema.getIOConfig().getFirehoseFactory(); + + if (firehoseFactory instanceof IngestSegmentFirehoseFactory) { + // pass toolbox to Firehose + ((IngestSegmentFirehoseFactory) firehoseFactory).setTaskToolbox(toolbox); + } + + final File firehoseTempDir = toolbox.getFirehoseTemporaryDir(); + // Firehose temporary directory is automatically removed when this IndexTask completes. + FileUtils.forceMkdir(firehoseTempDir); + + ingestionState = IngestionState.DETERMINE_PARTITIONS; + final ShardSpecs shardSpecs = determineShardSpecs(toolbox, firehoseFactory, firehoseTempDir); + final DataSchema dataSchema; + final Map versions; + if (determineIntervals) { + final SortedSet intervals = new TreeSet<>(Comparators.intervalsByStartThenEnd()); + intervals.addAll(shardSpecs.getIntervals()); + final Map locks = Tasks.tryAcquireExclusiveLocks( + toolbox.getTaskActionClient(), + intervals + ); + versions = locks.entrySet().stream() + .collect(Collectors.toMap(Entry::getKey, entry -> entry.getValue().getVersion())); + + dataSchema = ingestionSchema.getDataSchema().withGranularitySpec( + ingestionSchema.getDataSchema() + .getGranularitySpec() + .withIntervals( + JodaUtils.condenseIntervals( + shardSpecs.getIntervals() + ) + ) + ); + } else { + versions = getTaskLocks(toolbox.getTaskActionClient()) + .stream() + .collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion)); + dataSchema = ingestionSchema.getDataSchema(); + } + + ingestionState = IngestionState.BUILD_SEGMENTS; + return generateAndPublishSegments(toolbox, dataSchema, shardSpecs, versions, firehoseFactory, firehoseTempDir); + } + catch (Exception e) { + log.error(e, "Encountered exception in %s.", ingestionState); + errorMsg = Throwables.getStackTraceAsString(e); + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg ); - } else { - versions = getTaskLocks(toolbox.getTaskActionClient()) - .stream() - .collect(Collectors.toMap(TaskLock::getInterval, TaskLock::getVersion)); - dataSchema = ingestionSchema.getDataSchema(); } - if (generateAndPublishSegments(toolbox, dataSchema, shardSpecs, versions, firehoseFactory, firehoseTempDir)) { - toolbox.getTaskReportFileWriter().write(null); - return TaskStatus.success(getId()); - } else { - toolbox.getTaskReportFileWriter().write(null); - return TaskStatus.failure(getId()); + finally { + if (chatHandlerProvider.isPresent()) { + chatHandlerProvider.get().unregister(getId()); + } + } + } + + private Map getTaskCompletionReports() + { + return TaskReport.buildTaskReports( + new IngestionStatsAndErrorsTaskReport( + getId(), + new IngestionStatsAndErrorsTaskReportData( + ingestionState, + getTaskCompletionUnparseableEvents(), + getTaskCompletionRowStats(), + errorMsg + ) + ) + ); + } + + private Map getTaskCompletionUnparseableEvents() + { + Map unparseableEventsMap = Maps.newHashMap(); + List determinePartitionsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( + determinePartitionsSavedParseExceptions); + List buildSegmentsParseExceptionMessages = IndexTaskUtils.getMessagesFromSavedParseExceptions( + buildSegmentsSavedParseExceptions); + + if (determinePartitionsParseExceptionMessages != null || buildSegmentsParseExceptionMessages != null) { + unparseableEventsMap.put("determinePartitions", determinePartitionsParseExceptionMessages); + unparseableEventsMap.put("buildSegments", buildSegmentsParseExceptionMessages); + } + + return unparseableEventsMap; + } + + private Map getTaskCompletionRowStats() + { + Map metrics = Maps.newHashMap(); + if (determinePartitionsMetricsGetter != null) { + metrics.put( + "determinePartitions", + determinePartitionsMetricsGetter.getTotalMetrics() + ); } + if (buildSegmentsMetricsGetter != null) { + metrics.put( + "buildSegments", + buildSegmentsMetricsGetter.getTotalMetrics() + ); + } + return metrics; } private static String findVersion(Map versions, Interval interval) @@ -387,7 +629,7 @@ private static ShardSpecs createShardSpecWithoutInputScan( return new ShardSpecs(shardSpecs); } - private static ShardSpecs createShardSpecsFromInput( + private ShardSpecs createShardSpecsFromInput( ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, FirehoseFactory firehoseFactory, @@ -449,7 +691,7 @@ private static ShardSpecs createShardSpecsFromInput( return new ShardSpecs(intervalToShardSpecs); } - private static Map> collectIntervalsAndShardSpecs( + private Map> collectIntervalsAndShardSpecs( ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, FirehoseFactory firehoseFactory, @@ -459,6 +701,11 @@ private static Map> collectIntervalsAnd boolean determineNumPartitions ) throws IOException { + determinePartitionsFireDepartmentMetrics = new FireDepartmentMetrics(); + determinePartitionsMetricsGetter = new FireDepartmentMetricsTaskMetricsGetter( + determinePartitionsFireDepartmentMetrics + ); + final Map> hllCollectors = new TreeMap<>( Comparators.intervalsByStartThenEnd() ); @@ -469,12 +716,14 @@ private static Map> collectIntervalsAnd try ( final Firehose firehose = firehoseFactory.connect(ingestionSchema.getDataSchema().getParser(), firehoseTempDir) ) { + while (firehose.hasMore()) { try { final InputRow inputRow = firehose.nextRow(); // The null inputRow means the caller must skip this row. if (inputRow == null) { + determinePartitionsFireDepartmentMetrics.incrementThrownAway(); continue; } @@ -482,9 +731,17 @@ private static Map> collectIntervalsAnd if (determineIntervals) { interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp()); } else { + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + final Optional optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp()); if (!optInterval.isPresent()) { - thrownAway++; + determinePartitionsFireDepartmentMetrics.incrementThrownAway(); continue; } interval = optInterval.get(); @@ -508,12 +765,21 @@ private static Map> collectIntervalsAnd hllCollectors.put(interval, Optional.absent()); } } + determinePartitionsFireDepartmentMetrics.incrementProcessed(); } catch (ParseException e) { - if (ingestionSchema.getTuningConfig().isReportParseExceptions()) { - throw e; - } else { - unparseable++; + if (ingestionSchema.getTuningConfig().isLogParseExceptions()) { + log.error(e, "Encountered parse exception: "); + } + + if (determinePartitionsSavedParseExceptions != null) { + determinePartitionsSavedParseExceptions.add(e); + } + + determinePartitionsFireDepartmentMetrics.incrementUnparseable(); + if (determinePartitionsFireDepartmentMetrics.unparseable() > ingestionSchema.getTuningConfig() + .getMaxParseExceptions()) { + throw new RuntimeException("Max parse exceptions exceeded, terminating task..."); } } } @@ -561,7 +827,7 @@ private static BiFunction getShardSpecCreateFunctio * * @return true if generated segments are successfully published, otherwise false */ - private boolean generateAndPublishSegments( + private TaskStatus generateAndPublishSegments( final TaskToolbox toolbox, final DataSchema dataSchema, final ShardSpecs shardSpecs, @@ -574,7 +840,8 @@ private boolean generateAndPublishSegments( final FireDepartment fireDepartmentForMetrics = new FireDepartment( dataSchema, new RealtimeIOConfig(null, null, null), null ); - final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics(); + buildSegmentsMetricsGetter = new FireDepartmentMetricsTaskMetricsGetter(buildSegmentsFireDepartmentMetrics); if (toolbox.getMonitorScheduler() != null) { toolbox.getMonitorScheduler().addMonitor( @@ -652,7 +919,7 @@ dataSchema, new RealtimeIOConfig(null, null, null), null }; try ( - final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema, tuningConfig); + final Appenderator appenderator = newAppenderator(buildSegmentsFireDepartmentMetrics, toolbox, dataSchema, tuningConfig); final BatchAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator); final Firehose firehose = firehoseFactory.connect(dataSchema.getParser(), firehoseTempDir) ) { @@ -663,13 +930,21 @@ dataSchema, new RealtimeIOConfig(null, null, null), null final InputRow inputRow = firehose.nextRow(); if (inputRow == null) { - fireDepartmentMetrics.incrementThrownAway(); + buildSegmentsFireDepartmentMetrics.incrementThrownAway(); continue; } + if (!Intervals.ETERNITY.contains(inputRow.getTimestamp())) { + final String errorMsg = StringUtils.format( + "Encountered row with timestamp that cannot be represented as a long: [%s]", + inputRow + ); + throw new ParseException(errorMsg); + } + final Optional optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp()); if (!optInterval.isPresent()) { - fireDepartmentMetrics.incrementThrownAway(); + buildSegmentsFireDepartmentMetrics.incrementThrownAway(); continue; } @@ -702,15 +977,15 @@ dataSchema, new RealtimeIOConfig(null, null, null), null throw new ISE("Failed to add a row with timestamp[%s]", inputRow.getTimestamp()); } - fireDepartmentMetrics.incrementProcessed(); - } - catch (ParseException e) { - if (tuningConfig.isReportParseExceptions()) { - throw e; + if (addResult.getParseException() != null) { + handleParseException(addResult.getParseException()); } else { - fireDepartmentMetrics.incrementUnparseable(); + buildSegmentsFireDepartmentMetrics.incrementProcessed(); } } + catch (ParseException e) { + handleParseException(e); + } } final SegmentsAndMetadata pushed = driver.pushAllAndClear(pushTimeout); @@ -721,15 +996,21 @@ dataSchema, new RealtimeIOConfig(null, null, null), null pushTimeout ); + ingestionState = IngestionState.COMPLETED; if (published == null) { log.error("Failed to publish segments, aborting!"); - return false; + errorMsg = "Failed to publish segments."; + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.failure( + getId(), + errorMsg + ); } else { log.info( "Processed[%,d] events, unparseable[%,d], thrownAway[%,d].", - fireDepartmentMetrics.processed(), - fireDepartmentMetrics.unparseable(), - fireDepartmentMetrics.thrownAway() + buildSegmentsFireDepartmentMetrics.processed(), + buildSegmentsFireDepartmentMetrics.unparseable(), + buildSegmentsFireDepartmentMetrics.thrownAway() ); log.info( "Published segments[%s]", Joiner.on(", ").join( @@ -739,7 +1020,9 @@ dataSchema, new RealtimeIOConfig(null, null, null), null ) ) ); - return true; + + toolbox.getTaskReportFileWriter().write(getTaskCompletionReports()); + return TaskStatus.success(getId()); } } catch (TimeoutException | ExecutionException e) { @@ -747,6 +1030,29 @@ dataSchema, new RealtimeIOConfig(null, null, null), null } } + private void handleParseException(ParseException e) + { + if (e.isFromPartiallyValidRow()) { + buildSegmentsFireDepartmentMetrics.incrementProcessedWithErrors(); + } else { + buildSegmentsFireDepartmentMetrics.incrementUnparseable(); + } + + if (ingestionSchema.tuningConfig.isLogParseExceptions()) { + log.error(e, "Encountered parse exception:"); + } + + if (buildSegmentsSavedParseExceptions != null) { + buildSegmentsSavedParseExceptions.add(e); + } + + if (buildSegmentsFireDepartmentMetrics.unparseable() + + buildSegmentsFireDepartmentMetrics.processedWithErrors() > ingestionSchema.tuningConfig.getMaxParseExceptions()) { + log.error("Max parse exceptions exceeded, terminating task..."); + throw new RuntimeException("Max parse exceptions exceeded, terminating task...", e); + } + } + private static boolean exceedMaxRowsInSegment(int numRowsInSegment, IndexTuningConfig indexTuningConfig) { // maxRowsInSegment should be null if numShards is set in indexTuningConfig @@ -947,6 +1253,10 @@ public static class IndexTuningConfig implements TuningConfig, AppenderatorConfi private final boolean forceGuaranteedRollup; private final boolean reportParseExceptions; private final long pushTimeout; + private final boolean logParseExceptions; + private final int maxParseExceptions; + private final int maxSavedParseExceptions; + @Nullable private final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory; @@ -963,10 +1273,13 @@ public IndexTuningConfig( @JsonProperty("buildV9Directly") @Nullable Boolean buildV9Directly, @JsonProperty("forceExtendableShardSpecs") @Nullable Boolean forceExtendableShardSpecs, @JsonProperty("forceGuaranteedRollup") @Nullable Boolean forceGuaranteedRollup, - @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, + @Deprecated @JsonProperty("reportParseExceptions") @Nullable Boolean reportParseExceptions, @JsonProperty("publishTimeout") @Nullable Long publishTimeout, // deprecated @JsonProperty("pushTimeout") @Nullable Long pushTimeout, - @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @JsonProperty("segmentWriteOutMediumFactory") @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @JsonProperty("logParseExceptions") @Nullable Boolean logParseExceptions, + @JsonProperty("maxParseExceptions") @Nullable Integer maxParseExceptions, + @JsonProperty("maxSavedParseExceptions") @Nullable Integer maxSavedParseExceptions ) { this( @@ -981,13 +1294,16 @@ public IndexTuningConfig( reportParseExceptions, pushTimeout != null ? pushTimeout : publishTimeout, null, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } private IndexTuningConfig() { - this(null, null, null, null, null, null, null, null, null, null, null, null); + this(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null); } private IndexTuningConfig( @@ -1002,7 +1318,10 @@ private IndexTuningConfig( @Nullable Boolean reportParseExceptions, @Nullable Long pushTimeout, @Nullable File basePersistDirectory, - @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory + @Nullable SegmentWriteOutMediumFactory segmentWriteOutMediumFactory, + @Nullable Boolean logParseExceptions, + @Nullable Integer maxParseExceptions, + @Nullable Integer maxSavedParseExceptions ) { Preconditions.checkArgument( @@ -1032,6 +1351,17 @@ private IndexTuningConfig( ); this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; + + if (this.reportParseExceptions) { + this.maxParseExceptions = 0; + this.maxSavedParseExceptions = maxSavedParseExceptions == null ? 0 : Math.min(1, maxSavedParseExceptions); + } else { + this.maxParseExceptions = maxParseExceptions == null ? TuningConfig.DEFAULT_MAX_PARSE_EXCEPTIONS : maxParseExceptions; + this.maxSavedParseExceptions = maxSavedParseExceptions == null + ? TuningConfig.DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS + : maxSavedParseExceptions; + } + this.logParseExceptions = logParseExceptions == null ? TuningConfig.DEFAULT_LOG_PARSE_EXCEPTIONS : logParseExceptions; } private static Integer initializeTargetPartitionSize(Integer numShards, Integer targetPartitionSize) @@ -1068,7 +1398,10 @@ public IndexTuningConfig withBasePersistDirectory(File dir) reportParseExceptions, pushTimeout, dir, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } @@ -1152,6 +1485,24 @@ public long getPushTimeout() return pushTimeout; } + @JsonProperty + public boolean isLogParseExceptions() + { + return logParseExceptions; + } + + @JsonProperty + public int getMaxParseExceptions() + { + return maxParseExceptions; + } + + @JsonProperty + public int getMaxSavedParseExceptions() + { + return maxSavedParseExceptions; + } + @Override public Period getIntermediatePersistPeriod() { @@ -1187,7 +1538,10 @@ public boolean equals(Object o) Objects.equals(numShards, that.numShards) && Objects.equals(indexSpec, that.indexSpec) && Objects.equals(basePersistDirectory, that.basePersistDirectory) && - Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory); + Objects.equals(segmentWriteOutMediumFactory, that.segmentWriteOutMediumFactory) && + logParseExceptions == that.logParseExceptions && + maxParseExceptions == that.maxParseExceptions && + maxSavedParseExceptions == that.maxSavedParseExceptions; } @Override @@ -1205,7 +1559,10 @@ public int hashCode() forceGuaranteedRollup, reportParseExceptions, pushTimeout, - segmentWriteOutMediumFactory + segmentWriteOutMediumFactory, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); } } diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java new file mode 100644 index 000000000000..aee89b58a5aa --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTaskUtils.java @@ -0,0 +1,78 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import io.druid.server.security.Access; +import io.druid.server.security.Action; +import io.druid.server.security.AuthorizationUtils; +import io.druid.server.security.AuthorizerMapper; +import io.druid.server.security.ForbiddenException; +import io.druid.server.security.Resource; +import io.druid.server.security.ResourceAction; +import io.druid.server.security.ResourceType; +import io.druid.utils.CircularBuffer; + +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; +import java.util.ArrayList; +import java.util.List; + +public class IndexTaskUtils +{ + @Nullable + public static List getMessagesFromSavedParseExceptions(CircularBuffer savedParseExceptions) + { + if (savedParseExceptions == null) { + return null; + } + + List events = new ArrayList<>(); + for (int i = 0; i < savedParseExceptions.size(); i++) { + events.add(savedParseExceptions.getLatest(i).getMessage()); + } + + return events; + } + + /** + * Authorizes action to be performed on a task's datasource + * + * @return authorization result + */ + public static Access datasourceAuthorizationCheck( + final HttpServletRequest req, + Action action, + String datasource, + AuthorizerMapper authorizerMapper + ) + { + ResourceAction resourceAction = new ResourceAction( + new Resource(datasource, ResourceType.DATASOURCE), + action + ); + + Access access = AuthorizationUtils.authorizeResourceAction(req, resourceAction, authorizerMapper); + if (!access.isAllowed()) { + throw new ForbiddenException(access.toString()); + } + + return access; + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java index 06e6342356be..daad56a8797f 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/ThreadPoolTaskRunner.java @@ -465,11 +465,11 @@ public TaskStatus call() log.warn(e, "Interrupted while running task[%s]", task); } - status = TaskStatus.failure(task.getId()); + status = TaskStatus.failure(task.getId(), e.toString()); } catch (Exception e) { log.error(e, "Exception while running task[%s]", task); - status = TaskStatus.failure(task.getId()); + status = TaskStatus.failure(task.getId(), e.toString()); } catch (Throwable t) { log.error(t, "Uncaught Throwable while running task[%s]", task); diff --git a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java index 1fa59cebd1a8..a0dc0fe22a19 100644 --- a/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/io/druid/indexing/overlord/http/OverlordResource.java @@ -621,7 +621,9 @@ public Response getCompleteTasks( status.getStatusCode(), status.getDuration(), TaskLocation.unknown(), - pair.rhs); + pair.rhs, + status.getErrorMsg() + ); })); return Response.ok(completeTasks).build(); @@ -804,7 +806,8 @@ public TaskStatusPlus apply(TaskRunnerWorkItem workItem) null, null, workItem.getLocation(), - workItem.getDataSource() + workItem.getDataSource(), + null ); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java index abd451a9ed30..5e09e978fb32 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/AppenderatorDriverRealtimeIndexTaskTest.java @@ -19,6 +19,7 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; @@ -34,15 +35,23 @@ import io.druid.data.input.FirehoseFactory; import io.druid.data.input.InputRow; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; import io.druid.data.input.impl.InputRowParser; +import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.MapInputRowParser; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.data.input.impl.TimeAndDimsParseSpec; import io.druid.data.input.impl.TimestampSpec; import io.druid.discovery.DataNodeService; import io.druid.discovery.DruidNodeAnnouncer; import io.druid.discovery.LookupNodeService; +import io.druid.indexer.IngestionState; +import io.druid.indexer.TaskMetricsUtils; import io.druid.indexer.TaskState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.SegmentLoaderFactory; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TaskToolboxFactory; @@ -117,12 +126,12 @@ import io.druid.server.DruidNode; import io.druid.server.coordination.DataSegmentServerAnnouncer; import io.druid.server.coordination.ServerType; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.LinearShardSpec; import io.druid.timeline.partition.NumberedShardSpec; import org.apache.commons.io.FileUtils; import org.easymock.EasyMock; -import org.hamcrest.CoreMatchers; import org.joda.time.DateTime; import org.joda.time.Period; import org.junit.After; @@ -130,8 +139,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.internal.matchers.ThrowableCauseMatcher; -import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; @@ -149,7 +156,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; @@ -161,6 +167,7 @@ public class AppenderatorDriverRealtimeIndexTaskTest "host", new NoopEmitter() ); + private static final ObjectMapper objectMapper = TestHelper.makeJsonMapper(); private static final String FAIL_DIM = "__fail__"; @@ -261,6 +268,7 @@ public Firehose connect(InputRowParser parser, File temporaryDirectory) throws P private TaskLockbox taskLockbox; private TaskToolboxFactory taskToolboxFactory; private File baseDir; + private File reportsFile; @Before public void setUp() throws IOException @@ -277,6 +285,7 @@ public void setUp() throws IOException derbyConnector.createPendingSegmentsTable(); baseDir = tempFolder.newFolder(); + reportsFile = File.createTempFile("KafkaIndexTaskTestReports-" + System.currentTimeMillis(), "json"); makeToolboxFactory(baseDir); } @@ -284,6 +293,7 @@ public void setUp() throws IOException public void tearDown() { taskExec.shutdownNow(); + reportsFile.delete(); } @Test(timeout = 60_000L) @@ -294,11 +304,11 @@ public void testDefaultResource() } - @Test(timeout = 60_000L, expected = ExecutionException.class) + @Test(timeout = 60_000L) public void testHandoffTimeout() throws Exception { expectPublishedSegments(1); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, true, 100L); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, true, 100L, true, 0, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -318,7 +328,8 @@ public void testHandoffTimeout() throws Exception firehose.close(); // handoff would timeout, resulting in exception - statusFuture.get(); + TaskStatus status = statusFuture.get(); + Assert.assertTrue(status.getErrorMsg().contains("java.util.concurrent.TimeoutException: Timeout waiting for task.")); } @Test(timeout = 60_000L) @@ -520,7 +531,7 @@ public void testTransformSpec() throws Exception new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil()) ) ); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -595,10 +606,10 @@ public void testReportParseExceptionsOnBadMetric() throws Exception firehose.addRows( ImmutableList.of( - ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), - ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "foo"), + ImmutableMap.of("t", 2000000L, "dim1", "foo", "met1", "1"), + ImmutableMap.of("t", 3000000L, "dim1", "foo", "met1", "foo"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", "foo"), - ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0) + ImmutableMap.of("t", 4000000L, "dim2", "bar", "met1", 2.0) ) ); @@ -606,26 +617,19 @@ public void testReportParseExceptionsOnBadMetric() throws Exception firehose.close(); // Wait for the task to finish. - expectedException.expect(ExecutionException.class); - expectedException.expectCause(CoreMatchers.instanceOf(ParseException.class)); - expectedException.expectCause( - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Encountered parse error for aggregator[met1]") - ) - ); - expectedException.expect( - ThrowableCauseMatcher.hasCause( - ThrowableCauseMatcher.hasCause( - CoreMatchers.allOf( - CoreMatchers.instanceOf(ParseException.class), - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Unable to parse value[foo] for field[met1]") - ) - ) - ) + TaskStatus status = statusFuture.get(); + Assert.assertTrue(status.getErrorMsg().contains("java.lang.RuntimeException: Max parse exceptions exceeded, terminating task...")); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:50:00.000Z, event={t=3000000, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]" ) ); - statusFuture.get(); + + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } @Test(timeout = 60_000L) @@ -633,7 +637,7 @@ public void testNoReportParseExceptions() throws Exception { expectPublishedSegments(1); - final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, false); + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, null, 1); final ListenableFuture statusFuture = runTask(task); // Wait for firehose to show up, it starts off null. @@ -671,7 +675,8 @@ public void testNoReportParseExceptions() throws Exception DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments); // Check metrics. - Assert.assertEquals(3, task.getMetrics().processed()); + Assert.assertEquals(2, task.getMetrics().processed()); + Assert.assertEquals(1, task.getMetrics().processedWithErrors()); Assert.assertEquals(0, task.getMetrics().thrownAway()); Assert.assertEquals(2, task.getMetrics().unparseable()); @@ -696,9 +701,195 @@ public void testNoReportParseExceptions() throws Exception } handOffCallbacks.clear(); + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 2, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 + ) + ); + + // Wait for the task to finish. + final TaskStatus taskStatus = statusFuture.get(); + Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsSuccess() throws Exception + { + expectPublishedSegments(1); + + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 10, 10); + final ListenableFuture statusFuture = runTask(task); + + // Wait for firehose to show up, it starts off null. + while (task.getFirehose() == null) { + Thread.sleep(50); + } + + final TestFirehose firehose = (TestFirehose) task.getFirehose(); + + firehose.addRows( + Arrays.asList( + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), + + // Null row- will be thrown away. + null, + + // Bad metric- will count as processed, but that particular metric won't update. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), + + // Bad long dim- will count as processed, but bad dims will get default values + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), + + // Bad row- will be unparseable. + ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), + + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0) + ) + ); + + // Stop the firehose, this will drain out existing events. + firehose.close(); + + // Wait for publish. + Collection publishedSegments = awaitSegments(); + + DataSegment publishedSegment = Iterables.getOnlyElement(publishedSegments); + + // Check metrics. + Assert.assertEquals(2, task.getMetrics().processed()); + Assert.assertEquals(2, task.getMetrics().processedWithErrors()); + Assert.assertEquals(0, task.getMetrics().thrownAway()); + Assert.assertEquals(2, task.getMetrics().unparseable()); + + // Do some queries. + Assert.assertEquals(4, sumMetric(task, null, "rows")); + Assert.assertEquals(3, sumMetric(task, null, "met1")); + + awaitHandoffs(); + + // Simulate handoff. + for (Map.Entry> entry : handOffCallbacks.entrySet()) { + final Pair executorRunnablePair = entry.getValue(); + Assert.assertEquals( + new SegmentDescriptor( + publishedSegment.getInterval(), + publishedSegment.getVersion(), + publishedSegment.getShardSpec().getPartitionNum() + ), + entry.getKey() + ); + executorRunnablePair.lhs.execute(executorRunnablePair.rhs); + } + handOffCallbacks.clear(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 2, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 + ) + ); + // Wait for the task to finish. final TaskStatus taskStatus = statusFuture.get(); Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode()); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {dim1=foo, met1=2.0, __fail__=x}", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,could not convert value [notnumber] to float,Unable to parse value[foo] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]", + "Unparseable timestamp found! Event: null" + ) + ); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + Assert.assertEquals(IngestionState.COMPLETED, reportData.getIngestionState()); + } + + @Test(timeout = 60_000L) + public void testMultipleParseExceptionsFailure() throws Exception + { + expectPublishedSegments(1); + + final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, TransformSpec.NONE, false, 0, true, 3, 10); + final ListenableFuture statusFuture = runTask(task); + + // Wait for firehose to show up, it starts off null. + while (task.getFirehose() == null) { + Thread.sleep(50); + } + + final TestFirehose firehose = (TestFirehose) task.getFirehose(); + + firehose.addRows( + Arrays.asList( + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "1"), + + // Null row- will be thrown away. + null, + + // Bad metric- will count as processed, but that particular metric won't update. + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "met1", "foo"), + + // Bad long dim- will count as processed, but bad dims will get default values + ImmutableMap.of("t", 1521251960729L, "dim1", "foo", "dimLong", "notnumber", "dimFloat", "notnumber", "met1", "foo"), + + // Bad row- will be unparseable. + ImmutableMap.of("dim1", "foo", "met1", 2.0, FAIL_DIM, "x"), + + // Good row- will be processed. + ImmutableMap.of("t", 1521251960729L, "dim2", "bar", "met1", 2.0) + ) + ); + + // Stop the firehose, this will drain out existing events. + firehose.close(); + + // Wait for the task to finish. + final TaskStatus taskStatus = statusFuture.get(); + Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode()); + Assert.assertTrue(taskStatus.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 2, + TaskMetricsUtils.ROWS_UNPARSEABLE, 2, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 + ) + ); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + Map expectedUnparseables = ImmutableMap.of( + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {dim1=foo, met1=2.0, __fail__=x}", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, dimLong=notnumber, dimFloat=notnumber, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,could not convert value [notnumber] to float,Unable to parse value[foo] for field[met1],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2018-03-17T01:59:20.729Z, event={t=1521251960729, dim1=foo, met1=foo}, dimensions=[dim1, dim2, dim1t, dimLong, dimFloat]}], exceptions: [Unable to parse value[foo] for field[met1],]", + "Unparseable timestamp found! Event: null" + ) + ); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + Assert.assertEquals(IngestionState.BUILD_SEGMENTS, reportData.getIngestionState()); } @Test(timeout = 60_000L) @@ -929,14 +1120,21 @@ public void testRestoreCorruptData() throws Exception final ListenableFuture statusFuture = runTask(task2); // Wait for the task to finish. - boolean caught = false; - try { - statusFuture.get(); - } - catch (Exception expected) { - caught = true; - } - Assert.assertTrue("expected exception", caught); + TaskStatus status = statusFuture.get(); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, 0, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_UNPARSEABLE, 0, + TaskMetricsUtils.ROWS_THROWN_AWAY, 0 + ) + ); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + Assert.assertTrue(status.getErrorMsg().contains("java.lang.IllegalArgumentException\n\tat java.nio.Buffer.position")); } } @@ -989,19 +1187,22 @@ public TaskStatus call() throws Exception private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId) { - return makeRealtimeTask(taskId, TransformSpec.NONE, true, 0); + return makeRealtimeTask(taskId, TransformSpec.NONE, true, 0, true, 0, 1); } private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, boolean reportParseExceptions) { - return makeRealtimeTask(taskId, TransformSpec.NONE, reportParseExceptions, 0); + return makeRealtimeTask(taskId, TransformSpec.NONE, reportParseExceptions, 0, true, null, 1); } private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, - final long handoffTimeout + final long handoffTimeout, + final Boolean logParseExceptions, + final Integer maxParseExceptions, + final Integer maxSavedParseExceptions ) { ObjectMapper objectMapper = new DefaultObjectMapper(); @@ -1012,7 +1213,13 @@ private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( new TimeAndDimsParseSpec( new TimestampSpec("t", "auto", null), new DimensionsSpec( - DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim1t")), + ImmutableList.of( + new StringDimensionSchema("dim1"), + new StringDimensionSchema("dim2"), + new StringDimensionSchema("dim1t"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), null, null ) @@ -1041,13 +1248,18 @@ private AppenderatorDriverRealtimeIndexTask makeRealtimeTask( reportParseExceptions, handoffTimeout, null, - null + null, + logParseExceptions, + maxParseExceptions, + maxSavedParseExceptions ); return new AppenderatorDriverRealtimeIndexTask( taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), - null + null, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ) { @Override @@ -1244,7 +1456,7 @@ public List getLocations() EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); } @@ -1266,4 +1478,17 @@ public long sumMetric(final Task task, final DimFilter filter, final String metr task.getQueryRunner(query).run(QueryPlus.wrap(query), ImmutableMap.of()).toList(); return results.isEmpty() ? 0 : results.get(0).getValue().getLongMetric(metric); } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = objectMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports + ); + } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java index 27ecd4bb66a2..64f4ddeb520d 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/CompactionTaskTest.java @@ -27,6 +27,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; +import com.google.inject.Binder; +import com.google.inject.Module; import io.druid.data.input.FirehoseFactory; import io.druid.data.input.impl.DimensionSchema; import io.druid.data.input.impl.DimensionsSpec; @@ -79,6 +81,8 @@ import io.druid.segment.loading.SegmentLoadingException; import io.druid.segment.transform.TransformingInputRowParser; import io.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import io.druid.server.security.AuthTestUtils; +import io.druid.server.security.AuthorizerMapper; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NumberedShardSpec; import org.hamcrest.CoreMatchers; @@ -200,7 +204,21 @@ private static ObjectMapper setupInjectablesInObjectMapper(ObjectMapper objectMa guiceIntrospector, objectMapper.getDeserializationConfig().getAnnotationIntrospector() ) ); - objectMapper.setInjectableValues(new GuiceInjectableValues(GuiceInjectors.makeStartupInjector())); + GuiceInjectableValues injectableValues = new GuiceInjectableValues( + GuiceInjectors.makeStartupInjectorWithModules( + ImmutableList.of( + new Module() + { + @Override + public void configure(Binder binder) + { + binder.bind(AuthorizerMapper.class).toInstance(AuthTestUtils.TEST_AUTHORIZER_MAPPER); + } + } + ) + ) + ); + objectMapper.setInjectableValues(injectableValues); objectMapper.registerModule( new SimpleModule().registerSubtypes(new NamedType(NumberedShardSpec.class, "NumberedShardSpec")) ); @@ -244,6 +262,9 @@ private static IndexTuningConfig createTuningConfig() false, null, 100L, + null, + null, + null, null ); } @@ -263,7 +284,8 @@ public void testSerdeWithInterval() throws IOException null, createTuningConfig(), ImmutableMap.of("testKey", "testContext"), - objectMapper + objectMapper, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ); final byte[] bytes = objectMapper.writeValueAsBytes(task); final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class); @@ -289,7 +311,8 @@ public void testSerdeWithSegments() throws IOException null, createTuningConfig(), ImmutableMap.of("testKey", "testContext"), - objectMapper + objectMapper, + AuthTestUtils.TEST_AUTHORIZER_MAPPER ); final byte[] bytes = objectMapper.writeValueAsBytes(task); final CompactionTask fromJson = objectMapper.readValue(bytes, CompactionTask.class); diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 8106c79122e8..906e2657ab67 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -19,19 +19,31 @@ package io.druid.indexing.common.task; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.io.Files; import io.druid.data.input.impl.CSVParseSpec; import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.FloatDimensionSchema; +import io.druid.data.input.impl.JSONParseSpec; +import io.druid.data.input.impl.LongDimensionSchema; import io.druid.data.input.impl.ParseSpec; import io.druid.data.input.impl.SpatialDimensionSchema; +import io.druid.data.input.impl.StringDimensionSchema; import io.druid.data.input.impl.StringInputRowParser; import io.druid.data.input.impl.TimestampSpec; +import io.druid.indexer.TaskMetricsUtils; +import io.druid.indexer.TaskState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; import io.druid.indexing.common.TaskLock; import io.druid.indexing.common.TaskLockType; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TaskReportFileWriter; +import io.druid.indexing.common.TaskStatus; import io.druid.indexing.common.TaskToolbox; import io.druid.indexing.common.TestUtils; import io.druid.indexing.common.actions.LockAcquireAction; @@ -46,9 +58,9 @@ import io.druid.indexing.overlord.SegmentPublishResult; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularities; -import io.druid.java.util.common.parsers.ParseException; import io.druid.math.expr.ExprMacroTable; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; @@ -65,13 +77,16 @@ import io.druid.segment.realtime.firehose.LocalFirehoseFactory; import io.druid.segment.transform.ExpressionTransform; import io.druid.segment.transform.TransformSpec; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.HashBasedNumberedShardSpec; import io.druid.timeline.partition.NoneShardSpec; import io.druid.timeline.partition.NumberedShardSpec; import io.druid.timeline.partition.ShardSpec; import org.joda.time.Interval; +import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -82,6 +97,7 @@ import java.io.IOException; import java.net.URI; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -119,6 +135,7 @@ public class IndexTaskTest private IndexMergerV9 indexMergerV9; private IndexIO indexIO; private volatile int segmentAllocatePartitionCounter; + private File reportsFile; public IndexTaskTest() { @@ -128,6 +145,18 @@ public IndexTaskTest() indexIO = testUtils.getTestIndexIO(); } + @Before + public void setup() throws IOException + { + reportsFile = File.createTempFile("IndexTaskTestReports-" + System.currentTimeMillis(), "json"); + } + + @After + public void teardown() throws IOException + { + reportsFile.delete(); + } + @Test public void testDeterminePartitions() throws Exception { @@ -151,10 +180,12 @@ public void testDeterminePartitions() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segments.size()); @@ -194,12 +225,14 @@ public void testForceExtendableShardSpecs() throws Exception createTuningConfig(2, null, true, false), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals(indexTask.getId(), indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segments.size()); @@ -243,12 +276,14 @@ public void testTransformSpec() throws Exception createTuningConfig(2, null, true, false), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals(indexTask.getId(), indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -284,10 +319,12 @@ public void testWithArbitraryGranularity() throws Exception createTuningConfig(10, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - List segments = runTask(indexTask); + List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); } @@ -318,10 +355,12 @@ public void testIntervalBucketing() throws Exception createTuningConfig(50, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); } @@ -348,10 +387,12 @@ public void testNumShardsProvided() throws Exception createTuningConfig(null, 1, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -384,12 +425,14 @@ public void testAppendToExisting() throws Exception createTuningConfig(2, null, false, false), true ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); Assert.assertEquals("index_append_test", indexTask.getGroupId()); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(2, segmentAllocatePartitionCounter); Assert.assertEquals(2, segments.size()); @@ -431,10 +474,12 @@ public void testIntervalNotSpecified() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(3, segments.size()); @@ -491,10 +536,12 @@ public void testCSVFileWithHeader() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -540,10 +587,12 @@ public void testCSVFileWithHeaderColumnOverride() throws Exception createTuningConfig(2, null, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(1, segments.size()); @@ -584,10 +633,12 @@ public void testWithSmallMaxTotalRows() throws Exception createTuningConfig(2, 2, 2L, null, false, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(6, segments.size()); @@ -626,10 +677,12 @@ public void testPerfectRollup() throws Exception createTuningConfig(3, 2, 2L, null, false, true, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(3, segments.size()); @@ -667,10 +720,12 @@ public void testBestEffortRollup() throws Exception createTuningConfig(3, 2, 2L, null, false, false, true), false ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(5, segments.size()); @@ -742,10 +797,12 @@ public void testIgnoreParseException() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; Assert.assertEquals(Arrays.asList("d"), segments.get(0).getDimensions()); Assert.assertEquals(Arrays.asList("val"), segments.get(0).getMetrics()); @@ -755,9 +812,6 @@ public void testIgnoreParseException() throws Exception @Test public void testReportParseException() throws Exception { - expectedException.expect(ParseException.class); - expectedException.expectMessage("Unparseable timestamp found!"); - final File tmpDir = temporaryFolder.newFolder(); final File tmpFile = File.createTempFile("druid", "index", tmpDir); @@ -795,12 +849,356 @@ public void testReportParseException() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList("Unparseable timestamp found! Event: {time=unparseable, d=a, val=1}") + ); + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + } + + @Test + public void testMultipleParseExceptionsSuccess() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("{\"time\":\"unparseable\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // unparseable time + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // valid row + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":\"notnumber\",\"dimFloat\":3.0,\"val\":1}\n"); // row with invalid long dimension + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":\"notnumber\",\"val\":1}\n"); // row with invalid float dimension + writer.write("{\"time\":\"2014-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":4.0,\"val\":\"notnumber\"}\n"); // row with invalid metric + writer.write("{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // invalid JSON + writer.write("{\"time\":\"3014-03-01T00:00:10Z\",\"dim\":\"outsideofinterval\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // thrown away + writer.write("{\"time\":\"99999999999-01-01T00:00:10Z\",\"dim\":\"b\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}\n"); // unparseable time + writer.write("this is not JSON\n"); // invalid JSON + } + + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + true, + false, + null, + null, + null, + true, + 7, + 7 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new JSONParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + null + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.SUCCESS, status.getStatusCode()); + Assert.assertEquals(null, status.getErrorMsg()); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "determinePartitions", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 4, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 + ), + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 3, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 4, + TaskMetricsUtils.ROWS_THROWN_AWAY, 1 + ) + ); + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + Arrays.asList( + "Unable to parse row [this is not JSON]", + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ), + "buildSegments", + Arrays.asList( + "Unable to parse row [this is not JSON]", + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unable to parse row [{\"time\":9.0x,\"dim\":\"a\",\"dimLong\":2,\"dimFloat\":3.0,\"val\":1}]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=4.0, val=notnumber}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [Unable to parse value[notnumber] for field[val],]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=notnumber, val=1}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to float,]", + "Found unparseable columns in row: [MapBasedInputRow{timestamp=2014-01-01T00:00:10.000Z, event={time=2014-01-01T00:00:10Z, dim=b, dimLong=notnumber, dimFloat=3.0, val=1}, dimensions=[dim, dimLong, dimFloat]}], exceptions: [could not convert value [notnumber] to long,]", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ) + ); + + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + } + + @Test + public void testMultipleParseExceptionsFailure() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("time,dim,dimLong,dimFloat,val\n"); + writer.write("unparseable,a,2,3.0,1\n"); // unparseable + writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n"); // valid row + writer.write("9.0,a,2,3.0,1\n"); // unparseable + writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n"); // thrown away + writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n"); // unparseable + } + + // Allow up to 3 parse exceptions, and save up to 2 parse exceptions + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + false, + false, + null, + null, + null, + true, + 2, + 5 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new CSVParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + Arrays.asList("time", "dim", "dimLong", "dimFloat", "val"), + true, + 0 + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, + null + ); + + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "buildSegments", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2 + ) + ); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList( + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ) + ); + + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + } + + @Test + public void testMultipleParseExceptionsFailureAtDeterminePartitions() throws Exception + { + final File tmpDir = temporaryFolder.newFolder(); + + final File tmpFile = File.createTempFile("druid", "index", tmpDir); + + try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) { + writer.write("time,dim,dimLong,dimFloat,val\n"); + writer.write("unparseable,a,2,3.0,1\n"); // unparseable + writer.write("2014-01-01T00:00:10Z,a,2,3.0,1\n"); // valid row + writer.write("9.0,a,2,3.0,1\n"); // unparseable + writer.write("3014-03-01T00:00:10Z,outsideofinterval,2,3.0,1\n"); // thrown away + writer.write("99999999999-01-01T00:00:10Z,b,2,3.0,1\n"); // unparseable + } + + // Allow up to 3 parse exceptions, and save up to 2 parse exceptions + final IndexTask.IndexTuningConfig tuningConfig = new IndexTask.IndexTuningConfig( + 2, + null, + null, + null, + null, + indexSpec, + null, + true, + false, + true, + false, + null, + null, + null, + true, + 2, + 5 + ); + + final IndexIngestionSpec parseExceptionIgnoreSpec = createIngestionSpec( + tmpDir, + new CSVParseSpec( + new TimestampSpec( + "time", + "auto", + null + ), + new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("dim"), + new LongDimensionSchema("dimLong"), + new FloatDimensionSchema("dimFloat") + ), + Lists.newArrayList(), + Lists.newArrayList() + ), + null, + Arrays.asList("time", "dim", "dimLong", "dimFloat", "val"), + true, + 0 + ), + null, + tuningConfig, + false + ); + + IndexTask indexTask = new IndexTask( + null, + null, + parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - runTask(indexTask); + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedMetrics = ImmutableMap.of( + "determinePartitions", + ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, 0, + TaskMetricsUtils.ROWS_PROCESSED, 1, + TaskMetricsUtils.ROWS_UNPARSEABLE, 3, + TaskMetricsUtils.ROWS_THROWN_AWAY, 2 + ) + ); + + Assert.assertEquals(expectedMetrics, reportData.getRowStats()); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + Arrays.asList( + "Unparseable timestamp found! Event: {time=99999999999-01-01T00:00:10Z, dim=b, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=9.0, dim=a, dimLong=2, dimFloat=3.0, val=1}", + "Unparseable timestamp found! Event: {time=unparseable, dim=a, dimLong=2, dimFloat=3.0, val=1}" + ), + "buildSegments", + new ArrayList<>() + ); + + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); } + @Test public void testCsvWithHeaderOfEmptyColumns() throws Exception { @@ -854,10 +1252,12 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - final List segments = runTask(indexTask); + final List segments = runTask(indexTask).rhs; // the order of result segments can be changed because hash shardSpec is used. // the below loop is to make this test deterministic. Assert.assertEquals(2, segments.size()); @@ -885,9 +1285,6 @@ public void testCsvWithHeaderOfEmptyColumns() throws Exception @Test public void testCsvWithHeaderOfEmptyTimestamp() throws Exception { - expectedException.expect(ParseException.class); - expectedException.expectMessage("Unparseable timestamp found!"); - final File tmpDir = temporaryFolder.newFolder(); final File tmpFile = File.createTempFile("druid", "index", tmpDir); @@ -924,13 +1321,34 @@ public void testCsvWithHeaderOfEmptyTimestamp() throws Exception null, null, parseExceptionIgnoreSpec, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); - runTask(indexTask); + TaskStatus status = runTask(indexTask).lhs; + Assert.assertEquals(TaskState.FAILED, status.getStatusCode()); + + checkTaskStatusErrorMsgForParseExceptionsExceeded(status); + + IngestionStatsAndErrorsTaskReportData reportData = getTaskReportData(); + + Map expectedUnparseables = ImmutableMap.of( + "determinePartitions", + new ArrayList<>(), + "buildSegments", + Arrays.asList("Unparseable timestamp found! Event: {column_1=2014-01-01T00:00:10Z, column_2=a, column_3=1}") + ); + Assert.assertEquals(expectedUnparseables, reportData.getUnparseableEvents()); + } + + public static void checkTaskStatusErrorMsgForParseExceptionsExceeded(TaskStatus status) + { + // full stacktrace will be too long and make tests brittle (e.g. if line # changes), just match the main message + Assert.assertTrue(status.getErrorMsg().contains("Max parse exceptions exceeded, terminating task...")); } - private List runTask(IndexTask indexTask) throws Exception + private Pair> runTask(IndexTask indexTask) throws Exception { final List segments = Lists.newArrayList(); @@ -1044,15 +1462,15 @@ public Map makeLoadSpec(URI uri) null, null, null, - new NoopTestTaskFileWriter() + new TaskReportFileWriter(reportsFile) ); indexTask.isReady(box.getTaskActionClient()); - indexTask.run(box); + TaskStatus status = indexTask.run(box); Collections.sort(segments); - return segments; + return Pair.of(status, segments); } private IndexTask.IndexIngestionSpec createIngestionSpec( @@ -1150,7 +1568,23 @@ private static IndexTuningConfig createTuningConfig( reportParseException, null, null, - null + null, + null, + null, + 1 + ); + } + + private IngestionStatsAndErrorsTaskReportData getTaskReportData() throws IOException + { + Map taskReports = jsonMapper.readValue( + reportsFile, + new TypeReference>() + { + } + ); + return IngestionStatsAndErrorsTaskReportData.getPayloadFromTaskReports( + taskReports ); } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java index cebee6c624f6..f9bfe12d772c 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/NoopTestTaskFileWriter.java @@ -22,6 +22,8 @@ import io.druid.indexing.common.TaskReport; import io.druid.indexing.common.TaskReportFileWriter; +import java.util.Map; + public class NoopTestTaskFileWriter extends TaskReportFileWriter { public NoopTestTaskFileWriter() @@ -30,7 +32,7 @@ public NoopTestTaskFileWriter() } @Override - public void write(TaskReport report) + public void write(Map reports) { } } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java index 916c925e0d02..e7e44e4d8419 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/RealtimeIndexTaskTest.java @@ -128,7 +128,6 @@ import org.junit.Before; import org.junit.Rule; import org.junit.Test; -import org.junit.internal.matchers.ThrowableCauseMatcher; import org.junit.internal.matchers.ThrowableMessageMatcher; import org.junit.rules.ExpectedException; import org.junit.rules.TemporaryFolder; @@ -478,21 +477,10 @@ public void testReportParseExceptionsOnBadMetric() throws Exception expectedException.expectCause(CoreMatchers.instanceOf(ParseException.class)); expectedException.expectCause( ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Encountered parse error for aggregator[met1]") - ) - ); - expectedException.expect( - ThrowableCauseMatcher.hasCause( - ThrowableCauseMatcher.hasCause( - CoreMatchers.allOf( - CoreMatchers.instanceOf(ParseException.class), - ThrowableMessageMatcher.hasMessage( - CoreMatchers.containsString("Unable to parse value[foo] for field[met1]") - ) - ) - ) + CoreMatchers.containsString("[Unable to parse value[foo] for field[met1]") ) ); + statusFuture.get(); } diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java new file mode 100644 index 000000000000..62edbbbdc0c0 --- /dev/null +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskReportSerdeTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import io.druid.indexer.IngestionState; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReport; +import io.druid.indexing.common.IngestionStatsAndErrorsTaskReportData; +import io.druid.indexing.common.TaskReport; +import io.druid.indexing.common.TestUtils; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class TaskReportSerdeTest +{ + private final ObjectMapper jsonMapper; + + public TaskReportSerdeTest() + { + TestUtils testUtils = new TestUtils(); + jsonMapper = testUtils.getTestObjectMapper(); + } + + @Test + public void testSerde() throws Exception + { + IngestionStatsAndErrorsTaskReport report1 = new IngestionStatsAndErrorsTaskReport( + "testID", + new IngestionStatsAndErrorsTaskReportData( + IngestionState.BUILD_SEGMENTS, + ImmutableMap.of( + "hello", "world" + ), + ImmutableMap.of( + "number", 1234 + ), + "an error message" + ) + ); + String report1serialized = jsonMapper.writeValueAsString(report1); + IngestionStatsAndErrorsTaskReport report2 = jsonMapper.readValue( + report1serialized, + IngestionStatsAndErrorsTaskReport.class + ); + Assert.assertEquals(report1, report2); + + + Map reportMap1 = TaskReport.buildTaskReports(report1); + String reportMapSerialized = jsonMapper.writeValueAsString(reportMap1); + Map reportMap2 = jsonMapper.readValue( + reportMapSerialized, + new TypeReference>() + { + } + ); + Assert.assertEquals(reportMap1, reportMap2); + } +} diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java index ff5a17d8ba44..589dcf3af913 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/TaskSerdeTest.java @@ -52,6 +52,7 @@ import io.druid.segment.realtime.plumber.Plumber; import io.druid.segment.realtime.plumber.PlumberSchool; import io.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import org.hamcrest.CoreMatchers; @@ -190,8 +191,28 @@ public void testIndexTaskSerde() throws Exception jsonMapper ), new IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true), - new IndexTuningConfig(10000, 10, null, 9999, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + 9999, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -253,8 +274,28 @@ public void testIndexTaskwithResourceSerde() throws Exception jsonMapper ), new IndexIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), true), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -782,6 +823,8 @@ public void testHadoopIndexTaskSerde() throws Exception null, "blah", jsonMapper, + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java index c17452f654aa..0c73602a1d25 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/TaskLifecycleTest.java @@ -115,6 +115,7 @@ import io.druid.server.coordination.ServerType; import io.druid.server.initialization.ServerConfig; import io.druid.server.metrics.NoopServiceEmitter; +import io.druid.server.security.AuthTestUtils; import io.druid.timeline.DataSegment; import io.druid.timeline.partition.NoneShardSpec; import org.easymock.EasyMock; @@ -667,8 +668,28 @@ public void testIndexTask() throws Exception mapper ), new IndexIOConfig(new MockFirehoseFactory(false), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -725,8 +746,28 @@ public void testIndexTaskFailure() throws Exception mapper ), new IndexIOConfig(new MockExceptionalFirehoseFactory(), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, 3, true, true, false, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + 3, + true, + true, + false, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); @@ -1090,8 +1131,28 @@ public void testResumeTasks() throws Exception mapper ), new IndexIOConfig(new MockFirehoseFactory(false), false), - new IndexTuningConfig(10000, 10, null, null, null, indexSpec, null, false, null, null, null, null, null, null) + new IndexTuningConfig( + 10000, + 10, + null, + null, + null, + indexSpec, + null, + false, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) ), + null, + AuthTestUtils.TEST_AUTHORIZER_MAPPER, null ); diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java index cae3eb686460..63c48987b9c9 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ObjectFlatteners.java @@ -190,7 +190,7 @@ public Object setValue(final Object value) }; } ) - .collect(Collectors.toSet()); + .collect(Collectors.toCollection(LinkedHashSet::new)); } }; } diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java index ca1f5ff67cf2..4c8b797ee593 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ParseException.java @@ -25,6 +25,8 @@ */ public class ParseException extends RuntimeException { + private boolean fromPartiallyValidRow = false; + public ParseException(String formatText, Object... arguments) { super(StringUtils.nonStrictFormat(formatText, arguments)); @@ -34,4 +36,14 @@ public ParseException(Throwable cause, String formatText, Object... arguments) { super(StringUtils.nonStrictFormat(formatText, arguments), cause); } + + public boolean isFromPartiallyValidRow() + { + return fromPartiallyValidRow; + } + + public void setFromPartiallyValidRow(boolean fromPartiallyValidRow) + { + this.fromPartiallyValidRow = fromPartiallyValidRow; + } } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index 332781b9beeb..ecfd6b95a7a9 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -44,6 +44,7 @@ import io.druid.java.util.common.ISE; import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.granularity.Granularity; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.dimension.DimensionSpec; @@ -451,7 +452,7 @@ protected abstract AggregatorType[] initAggs( ); // Note: This method needs to be thread safe. - protected abstract Integer addToFacts( + protected abstract AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -479,6 +480,55 @@ protected abstract Integer addToFacts( protected abstract boolean isNull(int rowOffset, int aggOffset); + public static class TimeAndDimsResult + { + private TimeAndDims timeAndDims; + private List parseExceptionMessages; + + public TimeAndDimsResult( + TimeAndDims timeAndDims, + List parseExceptionMessages + ) + { + this.timeAndDims = timeAndDims; + this.parseExceptionMessages = parseExceptionMessages; + } + + public TimeAndDims getTimeAndDims() + { + return timeAndDims; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } + + public static class AddToFactsResult + { + private int rowCount; + private List parseExceptionMessages; + + public AddToFactsResult( + int rowCount, + List parseExceptionMessages + ) + { + this.rowCount = rowCount; + this.parseExceptionMessages = parseExceptionMessages; + } + + public int getRowCount() + { + return rowCount; + } + + public List getParseExceptionMessages() + { + return parseExceptionMessages; + } + } @Override public void close() @@ -514,31 +564,36 @@ public Map getColumnCapabilities() * * @return the number of rows in the data set after adding the InputRow */ - public int add(InputRow row) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row) throws IndexSizeExceededException { return add(row, false); } - public int add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { - TimeAndDims key = toTimeAndDims(row); - final int rv = addToFacts( + TimeAndDimsResult timeAndDimsResult = toTimeAndDims(row); + final AddToFactsResult addToFactsResult = addToFacts( metrics, deserializeComplexMetrics, reportParseExceptions, row, numEntries, - key, + timeAndDimsResult.getTimeAndDims(), in, rowSupplier, skipMaxRowsInMemoryCheck ); updateMaxIngestedTime(row.getTimestamp()); - return rv; + ParseException parseException = getCombinedParseException( + row, + timeAndDimsResult.getParseExceptionMessages(), + addToFactsResult.getParseExceptionMessages() + ); + return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), parseException); } @VisibleForTesting - TimeAndDims toTimeAndDims(InputRow row) + TimeAndDimsResult toTimeAndDims(InputRow row) { row = formatRow(row); if (row.getTimestampFromEpoch() < minTimestamp) { @@ -549,6 +604,7 @@ TimeAndDims toTimeAndDims(InputRow row) Object[] dims; List overflow = null; + List parseExceptionMessages = new ArrayList<>(); synchronized (dimensionDescs) { dims = new Object[dimensionDescs.size()]; for (String dimension : rowDimensions) { @@ -576,10 +632,16 @@ TimeAndDims toTimeAndDims(InputRow row) } DimensionHandler handler = desc.getHandler(); DimensionIndexer indexer = desc.getIndexer(); - Object dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( - row.getRaw(dimension), - reportParseExceptions - ); + Object dimsKey = null; + try { + dimsKey = indexer.processRowValsToUnsortedEncodedKeyComponent( + row.getRaw(dimension), + true + ); + } + catch (ParseException pe) { + parseExceptionMessages.add(pe.getMessage()); + } // Set column capabilities as data is coming in if (!capabilities.hasMultipleValues() && dimsKey != null && handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) { @@ -622,7 +684,45 @@ TimeAndDims toTimeAndDims(InputRow row) if (row.getTimestamp() != null) { truncated = gran.bucketStart(row.getTimestamp()).getMillis(); } - return new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList); + + TimeAndDims timeAndDims = new TimeAndDims(Math.max(truncated, minTimestamp), dims, dimensionDescsList); + return new TimeAndDimsResult(timeAndDims, parseExceptionMessages); + } + + public static ParseException getCombinedParseException( + InputRow row, + List dimParseExceptionMessages, + List aggParseExceptionMessages + ) + { + int numAdded = 0; + StringBuilder stringBuilder = new StringBuilder(); + + if (dimParseExceptionMessages != null) { + for (String parseExceptionMessage : dimParseExceptionMessages) { + stringBuilder.append(parseExceptionMessage); + stringBuilder.append(","); + numAdded++; + } + } + if (aggParseExceptionMessages != null) { + for (String parseExceptionMessage : aggParseExceptionMessages) { + stringBuilder.append(parseExceptionMessage); + stringBuilder.append(","); + numAdded++; + } + } + + if (numAdded == 0) { + return null; + } + ParseException pe = new ParseException( + "Found unparseable columns in row: [%s], exceptions: [%s]", + row, + stringBuilder.toString() + ); + pe.setFromPartiallyValidRow(true); + return pe; } private synchronized void updateMaxIngestedTime(DateTime eventTime) diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java new file mode 100644 index 000000000000..06c537a0aa36 --- /dev/null +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexAddResult.java @@ -0,0 +1,52 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.incremental; + +import io.druid.java.util.common.parsers.ParseException; + +import javax.annotation.Nullable; + +public class IncrementalIndexAddResult +{ + private final int rowCount; + + @Nullable + private final ParseException parseException; + + public IncrementalIndexAddResult( + int rowCount, + @Nullable ParseException parseException + ) + { + this.rowCount = rowCount; + this.parseException = parseException; + } + + public int getRowCount() + { + return rowCount; + } + + @Nullable + public ParseException getParseException() + { + return parseException; + } +} diff --git a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java index d377634e0bc8..23df282a786b 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OffheapIncrementalIndex.java @@ -138,7 +138,7 @@ protected BufferAggregator[] initAggs( } @Override - protected Integer addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -238,7 +238,7 @@ protected Integer addToFacts( } } rowContainer.set(null); - return numEntries.get(); + return new AddToFactsResult(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java index 1a377ea826be..a961beb27817 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java @@ -37,7 +37,9 @@ import javax.annotation.Nullable; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -101,7 +103,7 @@ protected Aggregator[] initAggs( } @Override - protected Integer addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -113,17 +115,18 @@ protected Integer addToFacts( boolean skipMaxRowsInMemoryCheck ) throws IndexSizeExceededException { + List parseExceptionMessages; final int priorIndex = facts.getPriorIndex(key); Aggregator[] aggs; if (TimeAndDims.EMPTY_ROW_INDEX != priorIndex) { aggs = concurrentGet(priorIndex); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); } else { aggs = new Aggregator[metrics.length]; factorizeAggs(metrics, aggs, rowContainer, row); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); final int rowIndex = indexIncrement.getAndIncrement(); concurrentSet(rowIndex, aggs); @@ -140,14 +143,14 @@ protected Integer addToFacts( } else { // We lost a race aggs = concurrentGet(prev); - doAggregate(metrics, aggs, rowContainer, row, reportParseExceptions); + parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); // Free up the misfire concurrentRemove(rowIndex); // This is expected to occur ~80% of the time in the worst scenarios } } - return numEntries.get(); + return new AddToFactsResult(numEntries.get(), parseExceptionMessages); } @Override @@ -171,14 +174,14 @@ private void factorizeAggs( rowContainer.set(null); } - private void doAggregate( + private List doAggregate( AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal rowContainer, - InputRow row, - boolean reportParseExceptions + InputRow row ) { + List parseExceptionMessages = new ArrayList<>(); rowContainer.set(row); for (int i = 0; i < aggs.length; i++) { @@ -189,16 +192,14 @@ private void doAggregate( } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { - throw new ParseException(e, "Encountered parse error for aggregator[%s]", metrics[i].getName()); - } else { - log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName()); - } + log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName()); + parseExceptionMessages.add(e.getMessage()); } } } rowContainer.set(null); + return parseExceptionMessages; } private void closeAggregators() diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java index b9e89fee9462..4f34520e63fb 100644 --- a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexTest.java @@ -215,11 +215,10 @@ public void testUnparseableNumerics() throws IndexSizeExceededException { IncrementalIndex index = closer.closeLater(indexCreator.createIndex()); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [asdj] to long"); - index.add( + IncrementalIndexAddResult result; + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -229,12 +228,15 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=19.0, long=asdj, double=21.0}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [asdj] to long,]", + result.getParseException().getMessage() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [aaa] to float"); - index.add( + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -244,12 +246,15 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=aaa, long=20, double=21.0}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [aaa] to float,]", + result.getParseException().getMessage() + ); - expectedException.expect(ParseException.class); - expectedException.expectMessage("could not convert value [] to double"); - index.add( + result = index.add( new MapBasedInputRow( - System.currentTimeMillis() - 1, + 0, Lists.newArrayList("string", "float", "long", "double"), ImmutableMap.of( "string", "A", @@ -259,6 +264,11 @@ public void testUnparseableNumerics() throws IndexSizeExceededException ) ) ); + Assert.assertEquals(ParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "Found unparseable columns in row: [MapBasedInputRow{timestamp=1970-01-01T00:00:00.000Z, event={string=A, float=19.0, long=20, double=}, dimensions=[string, float, long, double]}], exceptions: [could not convert value [] to double,]", + result.getParseException().getMessage() + ); } @Test diff --git a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java index c4f5fe3f1737..53ee9ec92c63 100644 --- a/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java +++ b/processing/src/test/java/io/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java @@ -166,7 +166,7 @@ protected void concurrentSet(int offset, Aggregator[] value) } @Override - protected Integer addToFacts( + protected AddToFactsResult addToFacts( AggregatorFactory[] metrics, boolean deserializeComplexMetrics, boolean reportParseExceptions, @@ -235,8 +235,7 @@ protected Integer addToFacts( rowContainer.set(null); - - return numEntries.get(); + return new AddToFactsResult(numEntries.get(), new ArrayList<>()); } @Override diff --git a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java index bdf097da71d1..fa9490eae490 100644 --- a/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java +++ b/processing/src/test/java/io/druid/segment/incremental/TimeAndDimsCompTest.java @@ -45,13 +45,13 @@ public void testBasic() .buildOnheap(); long time = System.currentTimeMillis(); - TimeAndDims td1 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")); - TimeAndDims td2 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")); - TimeAndDims td3 = index.toTimeAndDims(toMapRow(time, "billy", "A")); + TimeAndDims td1 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "B")).getTimeAndDims(); + TimeAndDims td2 = index.toTimeAndDims(toMapRow(time, "billy", "A", "joe", "A")).getTimeAndDims(); + TimeAndDims td3 = index.toTimeAndDims(toMapRow(time, "billy", "A")).getTimeAndDims(); - TimeAndDims td4 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")); - TimeAndDims td5 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))); - TimeAndDims td6 = index.toTimeAndDims(toMapRow(time + 1)); + TimeAndDims td4 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", "B")).getTimeAndDims(); + TimeAndDims td5 = index.toTimeAndDims(toMapRow(time + 1, "billy", "A", "joe", Arrays.asList("A", "B"))).getTimeAndDims(); + TimeAndDims td6 = index.toTimeAndDims(toMapRow(time + 1)).getTimeAndDims(); Comparator comparator = index.dimsComparator(); diff --git a/server/src/main/java/io/druid/segment/indexing/TuningConfig.java b/server/src/main/java/io/druid/segment/indexing/TuningConfig.java index 998f07857527..7fd246d25732 100644 --- a/server/src/main/java/io/druid/segment/indexing/TuningConfig.java +++ b/server/src/main/java/io/druid/segment/indexing/TuningConfig.java @@ -30,4 +30,7 @@ }) public interface TuningConfig { + boolean DEFAULT_LOG_PARSE_EXCEPTIONS = false; + int DEFAULT_MAX_PARSE_EXCEPTIONS = Integer.MAX_VALUE; + int DEFAULT_MAX_SAVED_PARSE_EXCEPTIONS = 0; } diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java index d61de1aee04e..302b58663ced 100644 --- a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetrics.java @@ -28,6 +28,7 @@ public class FireDepartmentMetrics { private final AtomicLong processedCount = new AtomicLong(0); + private final AtomicLong processedWithErrorsCount = new AtomicLong(0); private final AtomicLong thrownAwayCount = new AtomicLong(0); private final AtomicLong unparseableCount = new AtomicLong(0); private final AtomicLong rowOutputCount = new AtomicLong(0); @@ -49,6 +50,11 @@ public void incrementProcessed() processedCount.incrementAndGet(); } + public void incrementProcessedWithErrors() + { + processedWithErrorsCount.incrementAndGet(); + } + public void incrementThrownAway() { thrownAwayCount.incrementAndGet(); @@ -124,6 +130,11 @@ public long processed() return processedCount.get(); } + public long processedWithErrors() + { + return processedWithErrorsCount.get(); + } + public long thrownAway() { return thrownAwayCount.get(); @@ -203,6 +214,7 @@ public FireDepartmentMetrics snapshot() { final FireDepartmentMetrics retVal = new FireDepartmentMetrics(); retVal.processedCount.set(processedCount.get()); + retVal.processedWithErrorsCount.set(processedWithErrorsCount.get()); retVal.thrownAwayCount.set(thrownAwayCount.get()); retVal.unparseableCount.set(unparseableCount.get()); retVal.rowOutputCount.set(rowOutputCount.get()); @@ -231,6 +243,7 @@ public FireDepartmentMetrics merge(FireDepartmentMetrics other) Preconditions.checkNotNull(other, "Cannot merge a null FireDepartmentMetrics"); FireDepartmentMetrics otherSnapshot = other.snapshot(); processedCount.addAndGet(otherSnapshot.processed()); + processedWithErrorsCount.addAndGet(otherSnapshot.processedWithErrors()); thrownAwayCount.addAndGet(otherSnapshot.thrownAway()); rowOutputCount.addAndGet(otherSnapshot.rowOutput()); unparseableCount.addAndGet(otherSnapshot.unparseable()); @@ -248,5 +261,4 @@ public FireDepartmentMetrics merge(FireDepartmentMetrics other) messageGap.set(Math.max(messageGap(), otherSnapshot.messageGap())); return this; } - } diff --git a/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java new file mode 100644 index 000000000000..9c7ee60fde18 --- /dev/null +++ b/server/src/main/java/io/druid/segment/realtime/FireDepartmentMetricsTaskMetricsGetter.java @@ -0,0 +1,64 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.realtime; + +import com.google.common.collect.ImmutableMap; +import io.druid.indexer.TaskMetricsGetter; +import io.druid.indexer.TaskMetricsUtils; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class FireDepartmentMetricsTaskMetricsGetter implements TaskMetricsGetter +{ + public static final List KEYS = Arrays.asList( + TaskMetricsUtils.ROWS_PROCESSED, + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, + TaskMetricsUtils.ROWS_THROWN_AWAY, + TaskMetricsUtils.ROWS_UNPARSEABLE + ); + + private final FireDepartmentMetrics fireDepartmentMetrics; + + public FireDepartmentMetricsTaskMetricsGetter( + FireDepartmentMetrics fireDepartmentMetrics + ) + { + this.fireDepartmentMetrics = fireDepartmentMetrics; + } + + @Override + public List getKeys() + { + return KEYS; + } + + @Override + public Map getTotalMetrics() + { + return ImmutableMap.of( + TaskMetricsUtils.ROWS_PROCESSED, fireDepartmentMetrics.processed(), + TaskMetricsUtils.ROWS_PROCESSED_WITH_ERRORS, fireDepartmentMetrics.processedWithErrors(), + TaskMetricsUtils.ROWS_THROWN_AWAY, fireDepartmentMetrics.thrownAway(), + TaskMetricsUtils.ROWS_UNPARSEABLE, fireDepartmentMetrics.unparseable() + ); + } +} diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java b/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java index dbd1ed831378..c8bba5cdeb84 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/Appenderator.java @@ -23,6 +23,7 @@ import com.google.common.util.concurrent.ListenableFuture; import io.druid.data.input.Committer; import io.druid.data.input.InputRow; +import io.druid.java.util.common.parsers.ParseException; import io.druid.query.QuerySegmentWalker; import io.druid.segment.incremental.IndexSizeExceededException; @@ -228,11 +229,20 @@ class AppenderatorAddResult private final int numRowsInSegment; private final boolean isPersistRequired; - AppenderatorAddResult(SegmentIdentifier identifier, int numRowsInSegment, boolean isPersistRequired) + @Nullable + private final ParseException parseException; + + AppenderatorAddResult( + SegmentIdentifier identifier, + int numRowsInSegment, + boolean isPersistRequired, + @Nullable ParseException parseException + ) { this.segmentIdentifier = identifier; this.numRowsInSegment = numRowsInSegment; this.isPersistRequired = isPersistRequired; + this.parseException = parseException; } SegmentIdentifier getSegmentIdentifier() @@ -249,5 +259,11 @@ boolean isPersistRequired() { return isPersistRequired; } + + @Nullable + public ParseException getParseException() + { + return parseException; + } } } diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java index a1ec20baa5b7..07a01ab4d021 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorDriverAddResult.java @@ -19,6 +19,8 @@ package io.druid.segment.realtime.appenderator; +import io.druid.java.util.common.parsers.ParseException; + import javax.annotation.Nullable; /** @@ -33,32 +35,44 @@ public class AppenderatorDriverAddResult private final long totalNumRowsInAppenderator; private final boolean isPersistRequired; + @Nullable + private final ParseException parseException; + public static AppenderatorDriverAddResult ok( SegmentIdentifier segmentIdentifier, int numRowsInSegment, long totalNumRowsInAppenderator, - boolean isPersistRequired + boolean isPersistRequired, + @Nullable ParseException parseException ) { - return new AppenderatorDriverAddResult(segmentIdentifier, numRowsInSegment, totalNumRowsInAppenderator, isPersistRequired); + return new AppenderatorDriverAddResult( + segmentIdentifier, + numRowsInSegment, + totalNumRowsInAppenderator, + isPersistRequired, + parseException + ); } public static AppenderatorDriverAddResult fail() { - return new AppenderatorDriverAddResult(null, 0, 0, false); + return new AppenderatorDriverAddResult(null, 0, 0, false, null); } private AppenderatorDriverAddResult( @Nullable SegmentIdentifier segmentIdentifier, int numRowsInSegment, long totalNumRowsInAppenderator, - boolean isPersistRequired + boolean isPersistRequired, + @Nullable ParseException parseException ) { this.segmentIdentifier = segmentIdentifier; this.numRowsInSegment = numRowsInSegment; this.totalNumRowsInAppenderator = totalNumRowsInAppenderator; this.isPersistRequired = isPersistRequired; + this.parseException = parseException; } public boolean isOk() @@ -85,4 +99,10 @@ public boolean isPersistRequired() { return isPersistRequired; } + + @Nullable + public ParseException getParseException() + { + return parseException; + } } diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java index be32f2960fff..f4932c6bdbce 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/AppenderatorImpl.java @@ -63,6 +63,7 @@ import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndexSegment; import io.druid.segment.Segment; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; import io.druid.segment.loading.DataSegmentPusher; @@ -218,9 +219,11 @@ public AppenderatorAddResult add( metrics.reportMessageMaxTimestamp(row.getTimestampFromEpoch()); final int sinkRowsInMemoryBeforeAdd = sink.getNumRowsInMemory(); final int sinkRowsInMemoryAfterAdd; + final IncrementalIndexAddResult addResult; try { - sinkRowsInMemoryAfterAdd = sink.add(row, !allowIncrementalPersists); + addResult = sink.add(row, !allowIncrementalPersists); + sinkRowsInMemoryAfterAdd = addResult.getRowCount(); } catch (IndexSizeExceededException e) { // Uh oh, we can't do anything about this! We can't persist (commit metadata would be out of sync) and we @@ -250,7 +253,7 @@ public AppenderatorAddResult add( } } - return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired); + return new AppenderatorAddResult(identifier, sink.getNumRows(), isPersistRequired, addResult.getParseException()); } @Override diff --git a/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java b/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java index 24482d19c96c..963522117a6c 100644 --- a/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java +++ b/server/src/main/java/io/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java @@ -300,7 +300,8 @@ protected AppenderatorDriverAddResult append( identifier, result.getNumRowsInSegment(), appenderator.getTotalRowCount(), - result.isPersistRequired() + result.isPersistRequired(), + result.getParseException() ); } catch (SegmentNotWritableException e) { diff --git a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java index 2f6a4a1dd995..ed4a2661d42b 100644 --- a/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java +++ b/server/src/main/java/io/druid/segment/realtime/plumber/RealtimePlumber.java @@ -61,6 +61,7 @@ import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndexSegment; import io.druid.segment.Segment; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; import io.druid.segment.indexing.RealtimeTuningConfig; @@ -216,13 +217,16 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - final int numRows = sink.add(row, false); + final IncrementalIndexAddResult addResult = sink.add(row, false); + if (config.isReportParseExceptions() && addResult.getParseException() != null) { + throw addResult.getParseException(); + } if (!sink.canAppendRow() || System.currentTimeMillis() > nextFlush) { persist(committerSupplier.get()); } - return numRows; + return addResult.getRowCount(); } private Sink getSink(long timestamp) diff --git a/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java index 634d8f514bd6..92790d2f20ae 100644 --- a/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/io/druid/segment/realtime/plumber/Sink.java @@ -34,6 +34,7 @@ import io.druid.segment.ReferenceCountingSegment; import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.incremental.IncrementalIndex; +import io.druid.segment.incremental.IncrementalIndexAddResult; import io.druid.segment.incremental.IncrementalIndexSchema; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.indexing.DataSchema; @@ -53,7 +54,7 @@ public class Sink implements Iterable { - private static final int ADD_FAILED = -1; + private static final IncrementalIndexAddResult ADD_FAILED = new IncrementalIndexAddResult(-1, null); private final Object hydrantLock = new Object(); private final Interval interval; @@ -139,7 +140,7 @@ public FireHydrant getCurrHydrant() return currHydrant; } - public int add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException + public IncrementalIndexAddResult add(InputRow row, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException { if (currHydrant == null) { throw new IAE("No currHydrant but given row[%s]", row); diff --git a/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java b/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java index afb9857dd556..31bf1f1e2e10 100644 --- a/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java +++ b/server/src/main/java/io/druid/server/coordination/ChangeRequestHistory.java @@ -22,13 +22,13 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.util.concurrent.AbstractFuture; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.druid.java.util.common.IAE; import io.druid.java.util.common.StringUtils; +import io.druid.utils.CircularBuffer; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -327,46 +327,4 @@ public boolean cancel(boolean interruptIfRunning) return true; } } - - static class CircularBuffer - { - private final E[] buffer; - - private int start = 0; - private int size = 0; - - CircularBuffer(int capacity) - { - buffer = (E[]) new Object[capacity]; - } - - void add(E item) - { - buffer[start++] = item; - - if (start >= buffer.length) { - start = 0; - } - - if (size < buffer.length) { - size++; - } - } - - E get(int index) - { - Preconditions.checkArgument(index >= 0 && index < size, "invalid index"); - - int bufferIndex = (start - size + index) % buffer.length; - if (bufferIndex < 0) { - bufferIndex += buffer.length; - } - return buffer[bufferIndex]; - } - - int size() - { - return size; - } - } } diff --git a/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java b/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java index bd4b42102553..5838f5a0025a 100644 --- a/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java +++ b/server/src/test/java/io/druid/segment/realtime/RealtimeManagerTest.java @@ -1061,7 +1061,7 @@ public int add(InputRow row, Supplier committerSupplier) throws Index return -1; } - return sink.add(row, false); + return sink.add(row, false).getRowCount(); } public Sink getSink(long timestamp) diff --git a/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java b/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java index 88bbe2b86c78..f031fe4540f7 100644 --- a/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java +++ b/server/src/test/java/io/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java @@ -309,7 +309,7 @@ public AppenderatorAddResult add( { rows.computeIfAbsent(identifier, k -> new ArrayList<>()).add(row); numRows++; - return new AppenderatorAddResult(identifier, numRows, false); + return new AppenderatorAddResult(identifier, numRows, false, null); } @Override diff --git a/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java b/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java index 05e726f23cbe..b09fedeaa8e6 100644 --- a/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java +++ b/server/src/test/java/io/druid/server/coordination/ChangeRequestHistoryTest.java @@ -22,6 +22,7 @@ import com.google.common.util.concurrent.FutureCallback; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import io.druid.utils.CircularBuffer; import org.junit.Assert; import org.junit.Test; @@ -173,7 +174,7 @@ public void testNonImmediateFuture() throws Exception @Test public void testCircularBuffer() { - ChangeRequestHistory.CircularBuffer circularBuffer = new ChangeRequestHistory.CircularBuffer<>( + CircularBuffer circularBuffer = new CircularBuffer<>( 3); circularBuffer.add(1); From 270fd1ea1589686fa19f62230fba1ca2d43f7c6e Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Thu, 5 Apr 2018 22:12:30 -0700 Subject: [PATCH 23/67] Allow getDomain to return disjointed intervals (#5570) * Allow getDomain to return disjointed intervals * Indentation issues --- .../timeline/partition/NoneShardSpec.java | 14 ++------- .../druid/timeline/partition/ShardSpec.java | 4 +-- .../io/druid/timeline/DataSegmentTest.java | 4 +-- .../io/druid/query/filter/DimFilterUtils.java | 18 +++++++++-- .../query/filter/DimFilterUtilsTest.java | 5 ++- .../partition/HashBasedNumberedShardSpec.java | 28 ++++++----------- .../timeline/partition/LinearShardSpec.java | 13 ++------ .../timeline/partition/NumberedShardSpec.java | 13 ++------ .../partition/SingleDimensionShardSpec.java | 31 +++++++++---------- 9 files changed, 55 insertions(+), 75 deletions(-) diff --git a/api/src/main/java/io/druid/timeline/partition/NoneShardSpec.java b/api/src/main/java/io/druid/timeline/partition/NoneShardSpec.java index 1fcadfb42d23..d20c3b5458c2 100644 --- a/api/src/main/java/io/druid/timeline/partition/NoneShardSpec.java +++ b/api/src/main/java/io/druid/timeline/partition/NoneShardSpec.java @@ -22,7 +22,7 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import io.druid.data.input.InputRow; import java.util.List; @@ -71,19 +71,11 @@ public int getPartitionNum() @Override public ShardSpecLookup getLookup(final List shardSpecs) { - - return new ShardSpecLookup() - { - @Override - public ShardSpec getShardSpec(long timestamp, InputRow row) - { - return shardSpecs.get(0); - } - }; + return (long timestamp, InputRow row) -> shardSpecs.get(0); } @Override - public Map> getDomain() + public Map> getDomain() { return ImmutableMap.of(); } diff --git a/api/src/main/java/io/druid/timeline/partition/ShardSpec.java b/api/src/main/java/io/druid/timeline/partition/ShardSpec.java index 5461544c609c..c691c3959dbb 100644 --- a/api/src/main/java/io/druid/timeline/partition/ShardSpec.java +++ b/api/src/main/java/io/druid/timeline/partition/ShardSpec.java @@ -21,7 +21,7 @@ import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import io.druid.data.input.InputRow; import java.util.List; @@ -50,5 +50,5 @@ public interface ShardSpec * * @return map of dimensions to its possible range. Dimensions with unknown possible range are not mapped */ - Map> getDomain(); + Map> getDomain(); } diff --git a/api/src/test/java/io/druid/timeline/DataSegmentTest.java b/api/src/test/java/io/druid/timeline/DataSegmentTest.java index bda7947dd2b2..f2743d24b0a2 100644 --- a/api/src/test/java/io/druid/timeline/DataSegmentTest.java +++ b/api/src/test/java/io/druid/timeline/DataSegmentTest.java @@ -24,7 +24,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import com.google.common.collect.Sets; import io.druid.TestObjectMapper; import io.druid.data.input.InputRow; @@ -82,7 +82,7 @@ public ShardSpecLookup getLookup(List shardSpecs) } @Override - public Map> getDomain() + public Map> getDomain() { return ImmutableMap.of(); } diff --git a/processing/src/main/java/io/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/io/druid/query/filter/DimFilterUtils.java index 42ee9eeba77a..25c991662ed2 100644 --- a/processing/src/main/java/io/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/io/druid/query/filter/DimFilterUtils.java @@ -122,12 +122,13 @@ public static Set filterShards(DimFilter dimFilter, Iterable input, Fu boolean include = true; if (dimFilter != null && shard != null) { - Map> domain = shard.getDomain(); - for (Map.Entry> entry : domain.entrySet()) { + Map> domain = shard.getDomain(); + for (Map.Entry> entry : domain.entrySet()) { String dimension = entry.getKey(); Optional> optFilterRangeSet = dimensionRangeCache .computeIfAbsent(dimension, d -> Optional.fromNullable(dimFilter.getDimensionRangeSet(d))); - if (optFilterRangeSet.isPresent() && optFilterRangeSet.get().subRangeSet(entry.getValue()).isEmpty()) { + + if (optFilterRangeSet.isPresent() && hasEmptyIntersection(optFilterRangeSet.get(), entry.getValue())) { include = false; } } @@ -139,4 +140,15 @@ public static Set filterShards(DimFilter dimFilter, Iterable input, Fu } return retSet; } + + private static boolean hasEmptyIntersection(RangeSet r1, RangeSet r2) + { + for (Range range : r2.asRanges()) { + if (!r1.subRangeSet(range).isEmpty()) { + return false; + } + } + + return true; + } } diff --git a/processing/src/test/java/io/druid/query/filter/DimFilterUtilsTest.java b/processing/src/test/java/io/druid/query/filter/DimFilterUtilsTest.java index 421aa47bae3c..480062510eaf 100644 --- a/processing/src/test/java/io/druid/query/filter/DimFilterUtilsTest.java +++ b/processing/src/test/java/io/druid/query/filter/DimFilterUtilsTest.java @@ -29,6 +29,7 @@ import com.google.common.collect.Range; import com.google.common.collect.RangeSet; import com.google.common.collect.Sets; +import com.google.common.collect.TreeRangeSet; import io.druid.timeline.partition.ShardSpec; import org.easymock.EasyMock; import org.junit.Assert; @@ -113,8 +114,10 @@ private static RangeSet rangeSet(List> ranges) private static ShardSpec shardSpec(String dimension, Range range) { ShardSpec shard = EasyMock.createMock(ShardSpec.class); + RangeSet rangeSet = TreeRangeSet.create(); + rangeSet.add(range); EasyMock.expect(shard.getDomain()) - .andReturn(ImmutableMap.of(dimension, range)) + .andReturn(ImmutableMap.of(dimension, rangeSet)) .anyTimes(); return shard; } diff --git a/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java b/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java index b06ba203ff94..2f1d32e7964a 100644 --- a/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/HashBasedNumberedShardSpec.java @@ -25,12 +25,11 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.base.Function; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import io.druid.data.input.InputRow; @@ -90,14 +89,10 @@ List getGroupKey(final long timestamp, final InputRow inputRow) if (partitionDimensions.isEmpty()) { return Rows.toGroupKey(timestamp, inputRow); } else { - return Lists.transform(partitionDimensions, new Function() - { - @Override - public Object apply(final String dim) - { - return inputRow.getDimension(dim); - } - }); + return Lists.transform( + partitionDimensions, + dim -> inputRow.getDimension(dim) + ); } } @@ -114,19 +109,14 @@ public String toString() @Override public ShardSpecLookup getLookup(final List shardSpecs) { - return new ShardSpecLookup() - { - @Override - public ShardSpec getShardSpec(long timestamp, InputRow row) - { - int index = Math.abs(hash(timestamp, row) % getPartitions()); - return shardSpecs.get(index); - } + return (long timestamp, InputRow row) -> { + int index = Math.abs(hash(timestamp, row) % getPartitions()); + return shardSpecs.get(index); }; } @Override - public Map> getDomain() + public Map> getDomain() { return ImmutableMap.of(); } diff --git a/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java b/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java index 000f035921d8..b2358eb707db 100644 --- a/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/LinearShardSpec.java @@ -23,7 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import io.druid.data.input.InputRow; import java.util.List; @@ -54,18 +54,11 @@ public int getPartitionNum() @Override public ShardSpecLookup getLookup(final List shardSpecs) { - return new ShardSpecLookup() - { - @Override - public ShardSpec getShardSpec(long timestamp, InputRow row) - { - return shardSpecs.get(0); - } - }; + return (long timestamp, InputRow row) -> shardSpecs.get(0); } @Override - public Map> getDomain() + public Map> getDomain() { return ImmutableMap.of(); } diff --git a/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java b/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java index 673bed3cf6a5..ea955e31f8a3 100644 --- a/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/NumberedShardSpec.java @@ -24,7 +24,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; import io.druid.data.input.InputRow; import java.util.List; @@ -67,18 +67,11 @@ public int getPartitionNum() @Override public ShardSpecLookup getLookup(final List shardSpecs) { - return new ShardSpecLookup() - { - @Override - public ShardSpec getShardSpec(long timestamp, InputRow row) - { - return shardSpecs.get(0); - } - }; + return (long timestamp, InputRow row) -> shardSpecs.get(0); } @Override - public Map> getDomain() + public Map> getDomain() { return ImmutableMap.of(); } diff --git a/server/src/main/java/io/druid/timeline/partition/SingleDimensionShardSpec.java b/server/src/main/java/io/druid/timeline/partition/SingleDimensionShardSpec.java index 8cbefe40f8a1..aa5527429f71 100644 --- a/server/src/main/java/io/druid/timeline/partition/SingleDimensionShardSpec.java +++ b/server/src/main/java/io/druid/timeline/partition/SingleDimensionShardSpec.java @@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Range; +import com.google.common.collect.RangeSet; +import com.google.common.collect.TreeRangeSet; import io.druid.data.input.InputRow; import io.druid.java.util.common.ISE; @@ -102,35 +104,30 @@ public int getPartitionNum() @Override public ShardSpecLookup getLookup(final List shardSpecs) { - return new ShardSpecLookup() - { - @Override - public ShardSpec getShardSpec(long timestamp, InputRow row) - { - for (ShardSpec spec : shardSpecs) { - if (spec.isInChunk(timestamp, row)) { - return spec; - } + return (long timestamp, InputRow row) -> { + for (ShardSpec spec : shardSpecs) { + if (spec.isInChunk(timestamp, row)) { + return spec; } - throw new ISE("row[%s] doesn't fit in any shard[%s]", row, shardSpecs); } + throw new ISE("row[%s] doesn't fit in any shard[%s]", row, shardSpecs); }; } @Override - public Map> getDomain() + public Map> getDomain() { - Range range; + RangeSet rangeSet = TreeRangeSet.create(); if (start == null && end == null) { - range = Range.all(); + rangeSet.add(Range.all()); } else if (start == null) { - range = Range.atMost(end); + rangeSet.add(Range.atMost(end)); } else if (end == null) { - range = Range.atLeast(start); + rangeSet.add(Range.atLeast(start)); } else { - range = Range.closed(start, end); + rangeSet.add(Range.closed(start, end)); } - return ImmutableMap.of(dimension, range); + return ImmutableMap.of(dimension, rangeSet); } public void setPartitionNum(int partitionNum) From 371c672828994ad763fe6f5a0caf635c69ab3c83 Mon Sep 17 00:00:00 2001 From: Senthil Kumar L S Date: Fri, 6 Apr 2018 11:26:59 +0530 Subject: [PATCH 24/67] Adding feature thetaSketchConstant to do some set operation in PostAgg (#5551) * Adding feature thetaSketchConstant to do some set operation in PostAggregator * Updated review comments for PR #5551 - Adding thetaSketchConstant * Fixed CI build issue * Updated review comments 2 for PR #5551 - Adding thetaSketchConstant --- .../theta/SketchConstantPostAggregator.java | 133 ++++++++++++++++++ .../datasketches/theta/SketchHolder.java | 14 +- .../datasketches/theta/SketchModule.java | 5 +- .../theta/SketchAggregationTest.java | 20 +++ .../SketchAggregationWithSimpleDataTest.java | 45 ++++++ .../resources/topn_query_sketch_const.json | 104 ++++++++++++++ .../post/ConstantPostAggregator.java | 3 +- .../aggregation/post/PostAggregatorIds.java | 1 + 8 files changed, 321 insertions(+), 4 deletions(-) create mode 100644 extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchConstantPostAggregator.java create mode 100644 extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json diff --git a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchConstantPostAggregator.java b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchConstantPostAggregator.java new file mode 100644 index 000000000000..510cb78b80bd --- /dev/null +++ b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchConstantPostAggregator.java @@ -0,0 +1,133 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.datasketches.theta; + +import java.util.Collections; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.codec.digest.DigestUtils; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; + +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.PostAggregatorIds; +import io.druid.query.cache.CacheKeyBuilder; + +/** + */ +public class SketchConstantPostAggregator implements PostAggregator +{ + + private final String name; + private final String value; + private final SketchHolder sketchValue; + + @JsonCreator + public SketchConstantPostAggregator(@JsonProperty("name") String name, @JsonProperty("value") String value) + { + this.name = name; + Preconditions.checkArgument(value != null && !value.isEmpty(), + "Constant value cannot be null or empty, expecting base64 encoded sketch string"); + this.value = value; + this.sketchValue = SketchHolder.deserialize(value); + } + + @Override + public Set getDependentFields() + { + return Collections.emptySet(); + } + + @Override + public Comparator getComparator() + { + return SketchHolder.COMPARATOR; + } + + @Override + public Object compute(Map combinedAggregators) + { + return sketchValue; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public SketchConstantPostAggregator decorate(Map aggregators) + { + return this; + } + + @JsonProperty("value") + public SketchHolder getSketchValue() + { + return sketchValue; + } + + @Override + public String toString() + { + return "SketchConstantPostAggregator{name='" + name + "', value='" + value + "'}"; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SketchConstantPostAggregator that = (SketchConstantPostAggregator) o; + if (!this.sketchValue.equals(that.sketchValue)) { + return false; + } + if (name != null ? !name.equals(that.name) : that.name != null) { + return false; + } + return true; + } + + @Override + public int hashCode() + { + int result = name != null ? name.hashCode() : 0; + result = 37 * result + sketchValue.hashCode(); + return result; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(PostAggregatorIds.THETA_SKETCH_CONSTANT) + .appendString(DigestUtils.sha1Hex(value)).build(); + } +} diff --git a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchHolder.java b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchHolder.java index cf5f1a3fb130..c273bd4a76aa 100644 --- a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchHolder.java +++ b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchHolder.java @@ -36,6 +36,7 @@ import io.druid.java.util.common.StringUtils; import org.apache.commons.codec.binary.Base64; +import java.util.Arrays; import java.util.Comparator; /** @@ -286,6 +287,11 @@ public static SketchHolder sketchSetOperation(Func func, int sketchSize, Object. } } + /** + * Ideally make use of Sketch's equals and hashCode methods but which are not value based implementations. + * And yet need value based equals and hashCode implementations for SketchHolder. + * Hence using Arrays.equals() and Arrays.hashCode(). + */ @Override public boolean equals(Object o) { @@ -295,6 +301,12 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - return this.getSketch().equals(((SketchHolder) o).getSketch()); + return Arrays.equals(this.getSketch().toByteArray(), ((SketchHolder) o).getSketch().toByteArray()); + } + + @Override + public int hashCode() + { + return 31 * Arrays.hashCode(this.getSketch().toByteArray()); } } diff --git a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java index 361dd4a2fbf9..18d699ec6c52 100644 --- a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java +++ b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/theta/SketchModule.java @@ -38,6 +38,8 @@ public class SketchModule implements DruidModule public static final String THETA_SKETCH_ESTIMATE_POST_AGG = "thetaSketchEstimate"; public static final String THETA_SKETCH_SET_OP_POST_AGG = "thetaSketchSetOp"; + + public static final String THETA_SKETCH_CONSTANT_POST_AGG = "thetaSketchConstant"; @Override public void configure(Binder binder) @@ -63,7 +65,8 @@ public List getJacksonModules() .registerSubtypes( new NamedType(SketchMergeAggregatorFactory.class, THETA_SKETCH), new NamedType(SketchEstimatePostAggregator.class, THETA_SKETCH_ESTIMATE_POST_AGG), - new NamedType(SketchSetPostAggregator.class, THETA_SKETCH_SET_OP_POST_AGG) + new NamedType(SketchSetPostAggregator.class, THETA_SKETCH_SET_OP_POST_AGG), + new NamedType(SketchConstantPostAggregator.class, THETA_SKETCH_CONSTANT_POST_AGG) ) .addSerializer( SketchHolder.class, new SketchHolderJsonSerializer() diff --git a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java index 260dc12b3507..b5eb3cb0f6c7 100644 --- a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationTest.java @@ -277,6 +277,14 @@ public void testSketchEstimatePostAggregatorSerde() throws Exception 2 ) ); + + assertPostAggregatorSerde( + new SketchEstimatePostAggregator( + "name", + new SketchConstantPostAggregator("name", "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI="), + null + ) + ); } @Test @@ -293,6 +301,18 @@ public void testSketchSetPostAggregatorSerde() throws Exception ) ) ); + + assertPostAggregatorSerde( + new SketchSetPostAggregator( + "name", + "INTERSECT", + null, + Lists.newArrayList( + new FieldAccessPostAggregator("name1", "fieldName1"), + new SketchConstantPostAggregator("name2", "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=") + ) + ) + ); } @Test diff --git a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java index 870fa9794b0c..d38166b4b915 100644 --- a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java +++ b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/theta/SketchAggregationWithSimpleDataTest.java @@ -276,6 +276,51 @@ public void testSimpleDataIngestAndSelectQuery() throws Exception Assert.assertEquals(100, result.getValue().getEvents().size()); Assert.assertEquals("AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=", result.getValue().getEvents().get(0).getEvent().get("pty_country")); } + + @Test + public void testTopNQueryWithSketchConstant() throws Exception + { + AggregationTestHelper topNQueryAggregationTestHelper = AggregationTestHelper.createTopNQueryAggregationTestHelper( + sm.getJacksonModules(), + tempFolder + ); + + Sequence seq = topNQueryAggregationTestHelper.runQueryOnSegments( + ImmutableList.of(s1, s2), + readFileFromClasspathAsString("topn_query_sketch_const.json") + ); + + Result result = (Result) Iterables.getOnlyElement(seq.toList()); + + Assert.assertEquals(DateTimes.of("2014-10-20T00:00:00.000Z"), result.getTimestamp()); + + DimensionAndMetricValueExtractor value1 = Iterables.get(result.getValue().getValue(), 0); + Assert.assertEquals(38.0, value1.getDoubleMetric("sketch_count"), 0.01); + Assert.assertEquals(38.0, value1.getDoubleMetric("sketchEstimatePostAgg"), 0.01); + Assert.assertEquals(2.0, value1.getDoubleMetric("sketchEstimatePostAggForSketchConstant"), 0.01); + Assert.assertEquals(39.0, value1.getDoubleMetric("sketchUnionPostAggEstimate"), 0.01); + Assert.assertEquals(1.0, value1.getDoubleMetric("sketchIntersectionPostAggEstimate"), 0.01); + Assert.assertEquals(37.0, value1.getDoubleMetric("sketchAnotBPostAggEstimate"), 0.01); + Assert.assertEquals("product_3", value1.getDimensionValue("product")); + + DimensionAndMetricValueExtractor value2 = Iterables.get(result.getValue().getValue(), 1); + Assert.assertEquals(42.0, value2.getDoubleMetric("sketch_count"), 0.01); + Assert.assertEquals(42.0, value2.getDoubleMetric("sketchEstimatePostAgg"), 0.01); + Assert.assertEquals(2.0, value2.getDoubleMetric("sketchEstimatePostAggForSketchConstant"), 0.01); + Assert.assertEquals(42.0, value2.getDoubleMetric("sketchUnionPostAggEstimate"), 0.01); + Assert.assertEquals(2.0, value2.getDoubleMetric("sketchIntersectionPostAggEstimate"), 0.01); + Assert.assertEquals(40.0, value2.getDoubleMetric("sketchAnotBPostAggEstimate"), 0.01); + Assert.assertEquals("product_1", value2.getDimensionValue("product")); + + DimensionAndMetricValueExtractor value3 = Iterables.get(result.getValue().getValue(), 2); + Assert.assertEquals(42.0, value3.getDoubleMetric("sketch_count"), 0.01); + Assert.assertEquals(42.0, value3.getDoubleMetric("sketchEstimatePostAgg"), 0.01); + Assert.assertEquals(2.0, value3.getDoubleMetric("sketchEstimatePostAggForSketchConstant"), 0.01); + Assert.assertEquals(42.0, value3.getDoubleMetric("sketchUnionPostAggEstimate"), 0.01); + Assert.assertEquals(2.0, value3.getDoubleMetric("sketchIntersectionPostAggEstimate"), 0.01); + Assert.assertEquals(40.0, value3.getDoubleMetric("sketchAnotBPostAggEstimate"), 0.01); + Assert.assertEquals("product_2", value3.getDimensionValue("product")); + } public static final String readFileFromClasspathAsString(String fileName) throws IOException { diff --git a/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json b/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json new file mode 100644 index 000000000000..3dc47dca86c3 --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/topn_query_sketch_const.json @@ -0,0 +1,104 @@ +{ + "queryType": "topN", + "dataSource": "test_datasource", + "granularity":"ALL", + "metric": { + "type": "inverted", + "metric": "sketch_count" + }, + "dimension": "product", + "threshold": 3, + "aggregations": [ + { + "type": "thetaSketch", + "name": "sketch_count", + "fieldName": "pty_country", + "size": 16384 + } + ], + "postAggregations": [ + { + "type": "thetaSketchEstimate", + "name": "sketchEstimatePostAgg", + "field": { + "type": "fieldAccess", + "fieldName": "sketch_count" + } + }, + { + "type": "thetaSketchEstimate", + "name": "sketchEstimatePostAggForSketchConstant", + "field": { + "type": "thetaSketchConstant", + "name": "theta_sketch_count", + "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" + } + }, + { + "type": "thetaSketchEstimate", + "name": "sketchIntersectionPostAggEstimate", + "field": { + "type": "thetaSketchSetOp", + "name": "sketchIntersectionPostAgg", + "func": "INTERSECT", + "size": 16384, + "fields": [ + { + "type": "fieldAccess", + "fieldName": "sketch_count" + }, + { + "type": "thetaSketchConstant", + "name": "theta_sketch_count", + "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" + } + ] + } + }, + { + "type": "thetaSketchEstimate", + "name": "sketchAnotBPostAggEstimate", + "field": { + "type": "thetaSketchSetOp", + "name": "sketchAnotBUnionPostAgg", + "func": "NOT", + "size": 16384, + "fields": [ + { + "type": "fieldAccess", + "fieldName": "sketch_count" + }, + { + "type": "thetaSketchConstant", + "name": "theta_sketch_count", + "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" + } + ] + } + }, + { + "type": "thetaSketchEstimate", + "name": "sketchUnionPostAggEstimate", + "field": { + "type": "thetaSketchSetOp", + "name": "sketchUnionPostAgg", + "func": "UNION", + "size": 16384, + "fields": [ + { + "type": "fieldAccess", + "fieldName": "sketch_count" + }, + { + "type": "thetaSketchConstant", + "name": "theta_sketch_count", + "value": "AgMDAAAazJMCAAAAAACAPzz9j7pWTMdROWGf15uY1nI=" + } + ] + } + } + ], + "intervals": [ + "2014-10-19T00:00:00.000Z/2014-10-22T00:00:00.000Z" + ] +} diff --git a/processing/src/main/java/io/druid/query/aggregation/post/ConstantPostAggregator.java b/processing/src/main/java/io/druid/query/aggregation/post/ConstantPostAggregator.java index b406b83a0fe4..80e665b9d833 100644 --- a/processing/src/main/java/io/druid/query/aggregation/post/ConstantPostAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/post/ConstantPostAggregator.java @@ -46,8 +46,7 @@ public ConstantPostAggregator( ) { this.name = name; - this.constantValue = constantValue; - Preconditions.checkNotNull(this.constantValue); + this.constantValue = Preconditions.checkNotNull(constantValue, "Constant value cannot be null"); } @Override diff --git a/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java index bfb7d4df108e..3ef660c2cbe4 100644 --- a/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java @@ -44,4 +44,5 @@ public class PostAggregatorIds public static final byte FINALIZING_FIELD_ACCESS = 20; public static final byte ZTEST = 21; public static final byte PVALUE_FROM_ZTEST = 22; + public static final byte THETA_SKETCH_CONSTANT = 23; } From ddd23a11e642aa09cb3dbc95a61f2e6a0d8f25f9 Mon Sep 17 00:00:00 2001 From: Dylan Wylie Date: Fri, 6 Apr 2018 07:52:58 +0100 Subject: [PATCH 25/67] Fix taskDuration docs for KafkaIndexingService (#5572) * With incremental handoff the changed line is no longer true. --- docs/content/development/extensions-core/kafka-ingestion.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/development/extensions-core/kafka-ingestion.md b/docs/content/development/extensions-core/kafka-ingestion.md index e0d7f73c689d..2c09b64e15f0 100644 --- a/docs/content/development/extensions-core/kafka-ingestion.md +++ b/docs/content/development/extensions-core/kafka-ingestion.md @@ -164,7 +164,7 @@ For Roaring bitmaps: |`consumerProperties`|Map|A map of properties to be passed to the Kafka consumer. This must contain a property `bootstrap.servers` with a list of Kafka brokers in the form: `:,:,...`.|yes| |`replicas`|Integer|The number of replica sets, where 1 means a single set of tasks (no replication). Replica tasks will always be assigned to different workers to provide resiliency against node failure.|no (default == 1)| |`taskCount`|Integer|The maximum number of *reading* tasks in a *replica set*. This means that the maximum number of reading tasks will be `taskCount * replicas` and the total number of tasks (*reading* + *publishing*) will be higher than this. See 'Capacity Planning' below for more details. The number of reading tasks will be less than `taskCount` if `taskCount > {numKafkaPartitions}`.|no (default == 1)| -|`taskDuration`|ISO8601 Period|The length of time before tasks stop reading and begin publishing their segment. Note that segments are only pushed to deep storage and loadable by historical nodes when the indexing task completes.|no (default == PT1H)| +|`taskDuration`|ISO8601 Period|The length of time before tasks stop reading and begin publishing their segment.|no (default == PT1H)| |`startDelay`|ISO8601 Period|The period to wait before the supervisor starts managing tasks.|no (default == PT5S)| |`period`|ISO8601 Period|How often the supervisor will execute its management logic. Note that the supervisor will also run in response to certain events (such as tasks succeeding, failing, and reaching their taskDuration) so this value specifies the maximum time between iterations.|no (default == PT30S)| |`useEarliestOffset`|Boolean|If a supervisor is managing a dataSource for the first time, it will obtain a set of starting offsets from Kafka. This flag determines whether it retrieves the earliest or latest offsets in Kafka. Under normal circumstances, subsequent tasks will start from where the previous segments ended so this flag will only be used on first run.|no (default == false)| From 723857699cd4b81ccc865bd8b9cc374791ce8234 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 5 Apr 2018 23:53:43 -0700 Subject: [PATCH 26/67] Add doc for automatic pendingSegments (#5565) * Add missing doc for automatic pendingSegments * address comments --- docs/content/configuration/coordinator.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/content/configuration/coordinator.md b/docs/content/configuration/coordinator.md index d571dadcd3b0..9e4f38cf44e9 100644 --- a/docs/content/configuration/coordinator.md +++ b/docs/content/configuration/coordinator.md @@ -29,6 +29,7 @@ The coordinator node uses several of the global configs in [Configuration](../co |`druid.coordinator.merge.on`|Boolean flag for whether or not the coordinator should try and merge small segments into a more optimal segment size.|false| |`druid.coordinator.conversion.on`|Boolean flag for converting old segment indexing versions to the latest segment indexing version.|false| |`druid.coordinator.load.timeout`|The timeout duration for when the coordinator assigns a segment to a historical node.|PT15M| +|`druid.coordinator.kill.pendingSegments.on`|Boolean flag for whether or not the coordinator clean up old entries in the `pendingSegments` table of metadata store. If set to true, coordinator will check the created time of most recently complete task. If it doesn't exist, it finds the created time of the earlist running/pending/waiting tasks. Once the created time is found, then for all dataSources not in the `killPendingSegmentsSkipList` (see [Dynamic configuration](#dynamic-configuration)), coordinator will ask the overlord to clean up the entries 1 day or more older than the found created time in the `pendingSegments` table. This will be done periodically based on `druid.coordinator.period` specified.|false| |`druid.coordinator.kill.on`|Boolean flag for whether or not the coordinator should submit kill task for unused segments, that is, hard delete them from metadata store and deep storage. If set to true, then for all whitelisted dataSources (or optionally all), coordinator will submit tasks periodically based on `period` specified. These kill tasks will delete all segments except for the last `durationToRetain` period. Whitelist or All can be set via dynamic configuration `killAllDataSources` and `killDataSourceWhitelist` described later.|false| |`druid.coordinator.kill.period`|How often to send kill tasks to the indexing service. Value must be greater than `druid.coordinator.period.indexingPeriod`. Only applies if kill is turned on.|P1D (1 Day)| |`druid.coordinator.kill.durationToRetain`| Do not kill segments in last `durationToRetain`, must be greater or equal to 0. Only applies and MUST be specified if kill is turned on. Note that default value is invalid.|PT-1S (-1 seconds)| @@ -103,8 +104,9 @@ Issuing a GET request at the same URL will return the spec that is currently in |`replicantLifetime`|The maximum number of coordinator runs for a segment to be replicated before we start alerting.|15| |`replicationThrottleLimit`|The maximum number of segments that can be replicated at one time.|10| |`emitBalancingStats`|Boolean flag for whether or not we should emit balancing stats. This is an expensive operation.|false| -|`killDataSourceWhitelist`|List of dataSources for which kill tasks are sent if property `druid.coordinator.kill.on` is true.|none| +|`killDataSourceWhitelist`|List of dataSources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false| +|`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none| |`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0| To view the audit history of coordinator dynamic config issue a GET request to the URL - From 298ed1755ddb8dba82abbebd5b2d8575a3a9877d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 5 Apr 2018 23:54:59 -0700 Subject: [PATCH 27/67] Fix indexTask to respect forceExtendableShardSpecs (#5509) * Fix indexTask to respect forceExtendableShardSpecs * add comments --- .../druid/indexing/common/task/IndexTask.java | 24 ++++++++++++------- .../indexing/common/task/IndexTaskTest.java | 2 +- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java index 9a4daa084a2e..aecdc11ef010 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/IndexTask.java @@ -524,16 +524,15 @@ private static String findVersion(Map versions, Interval inter private static boolean isGuaranteedRollup(IndexIOConfig ioConfig, IndexTuningConfig tuningConfig) { Preconditions.checkState( - !(tuningConfig.isForceGuaranteedRollup() && - (tuningConfig.isForceExtendableShardSpecs() || ioConfig.isAppendToExisting())), - "Perfect rollup cannot be guaranteed with extendable shardSpecs" + !tuningConfig.isForceGuaranteedRollup() || !ioConfig.isAppendToExisting(), + "Perfect rollup cannot be guaranteed when appending to existing dataSources" ); return tuningConfig.isForceGuaranteedRollup(); } private static boolean isExtendableShardSpecs(IndexIOConfig ioConfig, IndexTuningConfig tuningConfig) { - return !isGuaranteedRollup(ioConfig, tuningConfig); + return tuningConfig.isForceExtendableShardSpecs() || ioConfig.isAppendToExisting(); } /** @@ -1249,7 +1248,19 @@ public static class IndexTuningConfig implements TuningConfig, AppenderatorConfi private final IndexSpec indexSpec; private final File basePersistDirectory; private final int maxPendingPersists; + + /** + * This flag is to force to always use an extendableShardSpec (like {@link NumberedShardSpec} even if + * {@link #forceGuaranteedRollup} is set. + */ private final boolean forceExtendableShardSpecs; + + /** + * This flag is to force _perfect rollup mode_. {@link IndexTask} will scan the whole input data twice to 1) figure + * out proper shard specs for each segment and 2) generate segments. Note that perfect rollup mode basically assumes + * that no more data will be appended in the future. As a result, in perfect rollup mode, {@link NoneShardSpec} and + * {@link HashBasedNumberedShardSpec} are used for a single shard and two or shards, respectively. + */ private final boolean forceGuaranteedRollup; private final boolean reportParseExceptions; private final long pushTimeout; @@ -1345,11 +1356,6 @@ private IndexTuningConfig( this.pushTimeout = pushTimeout == null ? DEFAULT_PUSH_TIMEOUT : pushTimeout; this.basePersistDirectory = basePersistDirectory; - Preconditions.checkArgument( - !(this.forceExtendableShardSpecs && this.forceGuaranteedRollup), - "Perfect rollup cannot be guaranteed with extendable shardSpecs" - ); - this.segmentWriteOutMediumFactory = segmentWriteOutMediumFactory; if (this.reportParseExceptions) { diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java index 906e2657ab67..b68a5fe39e2f 100644 --- a/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/IndexTaskTest.java @@ -222,7 +222,7 @@ public void testForceExtendableShardSpecs() throws Exception tmpDir, null, null, - createTuningConfig(2, null, true, false), + createTuningConfig(2, null, true, true), false ), null, From c228eed500cbb6c3fda940e1f91ef2461560e081 Mon Sep 17 00:00:00 2001 From: Alexander T <37665177+aleksi75@users.noreply.github.com> Date: Fri, 6 Apr 2018 09:21:18 +0200 Subject: [PATCH 28/67] Update sql.md (#5519) Example code is wrong. 'Statement' has to be created from the Connection Object From b86ed99d9a8c48c15b47de41dfa42167d19b8975 Mon Sep 17 00:00:00 2001 From: Charles Allen Date: Fri, 6 Apr 2018 05:37:16 -0700 Subject: [PATCH 29/67] Deprecate spark2 profile in pom.xml (#5581) Deprecated due to https://github.com/druid-io/druid/pull/5382 --- pom.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pom.xml b/pom.xml index 1830c8b7c7f8..dc401857aa22 100644 --- a/pom.xml +++ b/pom.xml @@ -1127,13 +1127,6 @@ - - spark2 - - 2.6.5 - 1.11.143 - - strict From 5ab17668c04791ac342c07e4e896c1cad26f05d1 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 6 Apr 2018 08:06:45 -0700 Subject: [PATCH 30/67] CompressionUtils: Add support for decompressing xz, bz2, zip. (#5586) Also switch various firehoses to the new method. Fixes #5585. --- .../StaticAzureBlobStoreFirehoseFactory.java | 2 +- .../StaticCloudFilesFirehoseFactory.java | 2 +- .../StaticGoogleBlobStoreFirehoseFactory.java | 2 +- .../lookup/namespace/UriCacheGenerator.java | 28 +++------- .../firehose/s3/StaticS3FirehoseFactory.java | 18 +++--- java-util/pom.xml | 8 +++ .../java/util/common/CompressionUtils.java | 46 +++++++++++++++- .../util/common/CompressionUtilsTest.java | 55 +++++++++++++++++-- pom.xml | 10 ++++ .../firehose/HttpFirehoseFactory.java | 2 +- .../firehose/LocalFirehoseFactory.java | 4 +- 11 files changed, 136 insertions(+), 41 deletions(-) diff --git a/extensions-contrib/azure-extensions/src/main/java/io/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java b/extensions-contrib/azure-extensions/src/main/java/io/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java index e3ad8b432aa7..345cffd512cf 100644 --- a/extensions-contrib/azure-extensions/src/main/java/io/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java +++ b/extensions-contrib/azure-extensions/src/main/java/io/druid/firehose/azure/StaticAzureBlobStoreFirehoseFactory.java @@ -91,7 +91,7 @@ protected InputStream openObjectStream(AzureBlob object, long start) throws IOEx @Override protected InputStream wrapObjectStream(AzureBlob object, InputStream stream) throws IOException { - return object.getPath().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getPath()); } private static AzureByteSource makeByteSource(AzureStorage azureStorage, AzureBlob object) diff --git a/extensions-contrib/cloudfiles-extensions/src/main/java/io/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java b/extensions-contrib/cloudfiles-extensions/src/main/java/io/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java index 5f39e7e5a446..343635c4680e 100644 --- a/extensions-contrib/cloudfiles-extensions/src/main/java/io/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java +++ b/extensions-contrib/cloudfiles-extensions/src/main/java/io/druid/firehose/cloudfiles/StaticCloudFilesFirehoseFactory.java @@ -101,7 +101,7 @@ private CloudFilesByteSource createCloudFilesByteSource(CloudFilesBlob object) @Override protected InputStream wrapObjectStream(CloudFilesBlob object, InputStream stream) throws IOException { - return object.getPath().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getPath()); } @Override diff --git a/extensions-contrib/google-extensions/src/main/java/io/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java b/extensions-contrib/google-extensions/src/main/java/io/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java index 0d5d99959609..38fb83870883 100644 --- a/extensions-contrib/google-extensions/src/main/java/io/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java +++ b/extensions-contrib/google-extensions/src/main/java/io/druid/firehose/google/StaticGoogleBlobStoreFirehoseFactory.java @@ -93,7 +93,7 @@ private GoogleByteSource createGoogleByteSource(GoogleBlob object) @Override protected InputStream wrapObjectStream(GoogleBlob object, InputStream stream) throws IOException { - return object.getPath().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getPath()); } @Override diff --git a/extensions-core/lookups-cached-global/src/main/java/io/druid/server/lookup/namespace/UriCacheGenerator.java b/extensions-core/lookups-cached-global/src/main/java/io/druid/server/lookup/namespace/UriCacheGenerator.java index 27fd2eacbdfd..c2ca336c2eca 100644 --- a/extensions-core/lookups-cached-global/src/main/java/io/druid/server/lookup/namespace/UriCacheGenerator.java +++ b/extensions-core/lookups-cached-global/src/main/java/io/druid/server/lookup/namespace/UriCacheGenerator.java @@ -134,28 +134,14 @@ public CacheScheduler.VersionedCache generateCache( catch (NumberFormatException ex) { log.debug(ex, "Failed to get last modified timestamp. Assuming no timestamp"); } - final ByteSource source; - if (CompressionUtils.isGz(uriPath)) { - // Simple gzip stream - log.debug("Loading gz"); - source = new ByteSource() + final ByteSource source = new ByteSource() + { + @Override + public InputStream openStream() throws IOException { - @Override - public InputStream openStream() throws IOException - { - return CompressionUtils.gzipInputStream(puller.getInputStream(uri)); - } - }; - } else { - source = new ByteSource() - { - @Override - public InputStream openStream() throws IOException - { - return puller.getInputStream(uri); - } - }; - } + return CompressionUtils.decompress(puller.getInputStream(uri), uri.getPath()); + } + }; final CacheScheduler.VersionedCache versionedCache = scheduler.createVersionedCache(entryId, version); try { diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java b/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java index 8827fc9ae31d..8c41d6f1f129 100644 --- a/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java +++ b/extensions-core/s3-extensions/src/main/java/io/druid/firehose/s3/StaticS3FirehoseFactory.java @@ -114,14 +114,14 @@ protected Collection initObjects() throws IOException // Getting data is deferred until openObjectStream() is called for each object. if (!uris.isEmpty()) { return uris.stream() - .map( - uri -> { - final String s3Bucket = uri.getAuthority(); - final String key = S3Utils.extractS3Key(uri); - return S3Utils.getSingleObjectSummary(s3Client, s3Bucket, key); - } - ) - .collect(Collectors.toList()); + .map( + uri -> { + final String s3Bucket = uri.getAuthority(); + final String key = S3Utils.extractS3Key(uri); + return S3Utils.getSingleObjectSummary(s3Client, s3Bucket, key); + } + ) + .collect(Collectors.toList()); } else { final List objects = new ArrayList<>(); for (URI uri : prefixes) { @@ -212,7 +212,7 @@ protected InputStream openObjectStream(S3ObjectSummary object, long start) throw @Override protected InputStream wrapObjectStream(S3ObjectSummary object, InputStream stream) throws IOException { - return object.getKey().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getKey()); } @Override diff --git a/java-util/pom.xml b/java-util/pom.xml index 150c332a3ca7..7f0b462d9c57 100644 --- a/java-util/pom.xml +++ b/java-util/pom.xml @@ -81,6 +81,14 @@ org.mozilla rhino + + org.apache.commons + commons-compress + + + org.tukaani + xz + com.jayway.jsonpath json-path diff --git a/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java b/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java index 876f26f2f589..c076ea6e8def 100644 --- a/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java +++ b/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java @@ -28,14 +28,18 @@ import com.google.common.io.Files; import io.druid.java.util.common.io.NativeIO; import io.druid.java.util.common.logger.Logger; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorInputStream; import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.Enumeration; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -48,7 +52,9 @@ public class CompressionUtils { private static final Logger log = new Logger(CompressionUtils.class); private static final int DEFAULT_RETRY_COUNT = 3; + private static final String BZ2_SUFFIX = ".bz2"; private static final String GZ_SUFFIX = ".gz"; + private static final String XZ_SUFFIX = ".xz"; private static final String ZIP_SUFFIX = ".zip"; /** @@ -313,7 +319,7 @@ public static FileUtils.FileCopyResult gunzip(InputStream in, File outFile) thro * * @return A GZIPInputStream that can handle concatenated gzip streams in the input */ - public static GZIPInputStream gzipInputStream(final InputStream in) throws IOException + private static GZIPInputStream gzipInputStream(final InputStream in) throws IOException { return new GZIPInputStream( new FilterInputStream(in) @@ -516,4 +522,42 @@ public static String getGzBaseName(String fname) } throw new IAE("[%s] is not a valid gz file name", fname); } + + /** + * Decompress an input stream from a file, based on the filename. + */ + public static InputStream decompress(final InputStream in, final String fileName) throws IOException + { + if (fileName.endsWith(GZ_SUFFIX)) { + return gzipInputStream(in); + } else if (fileName.endsWith(BZ2_SUFFIX)) { + return new BZip2CompressorInputStream(in, true); + } else if (fileName.endsWith(XZ_SUFFIX)) { + return new XZCompressorInputStream(in, true); + } else if (fileName.endsWith(ZIP_SUFFIX)) { + // This reads the first file in the archive. + final ZipInputStream zipIn = new ZipInputStream(in, StandardCharsets.UTF_8); + try { + final ZipEntry nextEntry = zipIn.getNextEntry(); + if (nextEntry == null) { + zipIn.close(); + + // No files in the archive - return an empty stream. + return new ByteArrayInputStream(new byte[0]); + } + return zipIn; + } + catch (IOException e) { + try { + zipIn.close(); + } + catch (IOException e2) { + e.addSuppressed(e2); + } + throw e; + } + } else { + return in; + } + } } diff --git a/java-util/src/test/java/io/druid/java/util/common/CompressionUtilsTest.java b/java-util/src/test/java/io/druid/java/util/common/CompressionUtilsTest.java index c00e75b92249..d8f878b9ed70 100644 --- a/java-util/src/test/java/io/druid/java/util/common/CompressionUtilsTest.java +++ b/java-util/src/test/java/io/druid/java/util/common/CompressionUtilsTest.java @@ -25,6 +25,8 @@ import com.google.common.io.ByteSource; import com.google.common.io.ByteStreams; import com.google.common.io.Files; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream; +import org.apache.commons.compress.compressors.xz.XZCompressorOutputStream; import org.junit.Assert; import org.junit.Before; import org.junit.Rule; @@ -53,6 +55,8 @@ import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; public class CompressionUtilsTest { @@ -221,7 +225,6 @@ public void testGoodZipStream() throws IOException } } - @Test public void testGoodGzipByteSource() throws IOException { @@ -230,7 +233,7 @@ public void testGoodGzipByteSource() throws IOException Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), Files.asByteSink(gzFile), Predicates.alwaysTrue()); Assert.assertTrue(gzFile.exists()); - try (final InputStream inputStream = CompressionUtils.gzipInputStream(new FileInputStream(gzFile))) { + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), gzFile.getName())) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { @@ -244,6 +247,50 @@ public void testGoodGzipByteSource() throws IOException } } + @Test + public void testDecompressBzip2() throws IOException + { + final File tmpDir = temporaryFolder.newFolder("testDecompressBzip2"); + final File bzFile = new File(tmpDir, testFile.getName() + ".bz2"); + Assert.assertFalse(bzFile.exists()); + try (final OutputStream out = new BZip2CompressorOutputStream(new FileOutputStream(bzFile))) { + ByteStreams.copy(new FileInputStream(testFile), out); + } + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(bzFile), bzFile.getName())) { + assertGoodDataStream(inputStream); + } + } + + @Test + public void testDecompressXz() throws IOException + { + final File tmpDir = temporaryFolder.newFolder("testDecompressXz"); + final File xzFile = new File(tmpDir, testFile.getName() + ".xz"); + Assert.assertFalse(xzFile.exists()); + try (final OutputStream out = new XZCompressorOutputStream(new FileOutputStream(xzFile))) { + ByteStreams.copy(new FileInputStream(testFile), out); + } + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(xzFile), xzFile.getName())) { + assertGoodDataStream(inputStream); + } + } + + @Test + public void testDecompressZip() throws IOException + { + final File tmpDir = temporaryFolder.newFolder("testDecompressZip"); + final File zipFile = new File(tmpDir, testFile.getName() + ".zip"); + Assert.assertFalse(zipFile.exists()); + try (final ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile))) { + out.putNextEntry(new ZipEntry("cool.file")); + ByteStreams.copy(new FileInputStream(testFile), out); + out.closeEntry(); + } + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(zipFile), zipFile.getName())) { + assertGoodDataStream(inputStream); + } + } + @Test public void testGoodGZStream() throws IOException { @@ -490,7 +537,7 @@ public void flush() throws IOException }, Predicates.alwaysTrue() ); Assert.assertTrue(gzFile.exists()); - try (final InputStream inputStream = CompressionUtils.gzipInputStream(new FileInputStream(gzFile))) { + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), "file.gz")) { assertGoodDataStream(inputStream); } if (!testFile.delete()) { @@ -536,7 +583,7 @@ public void testStreamErrorGunzip() throws Exception Assert.assertFalse(gzFile.exists()); CompressionUtils.gzip(Files.asByteSource(testFile), Files.asByteSink(gzFile), Predicates.alwaysTrue()); Assert.assertTrue(gzFile.exists()); - try (final InputStream inputStream = CompressionUtils.gzipInputStream(new FileInputStream(gzFile))) { + try (final InputStream inputStream = CompressionUtils.decompress(new FileInputStream(gzFile), "file.gz")) { assertGoodDataStream(inputStream); } if (testFile.exists() && !testFile.delete()) { diff --git a/pom.xml b/pom.xml index dc401857aa22..8fc12df9adc4 100644 --- a/pom.xml +++ b/pom.xml @@ -325,6 +325,16 @@ rhino 1.7R5 + + org.apache.commons + commons-compress + 1.16 + + + org.tukaani + xz + 1.8 + com.fasterxml.jackson.core jackson-annotations diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java b/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java index aaab6f9dae55..949cb1db7e47 100644 --- a/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java +++ b/server/src/main/java/io/druid/segment/realtime/firehose/HttpFirehoseFactory.java @@ -105,7 +105,7 @@ protected InputStream openObjectStream(URI object, long start) throws IOExceptio @Override protected InputStream wrapObjectStream(URI object, InputStream stream) throws IOException { - return object.getPath().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getPath()); } @Override diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java b/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java index 6db1e8c30941..e9e7b40bb730 100644 --- a/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java +++ b/server/src/main/java/io/druid/segment/realtime/firehose/LocalFirehoseFactory.java @@ -22,10 +22,10 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; -import io.druid.java.util.emitter.EmittingLogger; import io.druid.data.input.impl.AbstractTextFilesFirehoseFactory; import io.druid.data.input.impl.StringInputRowParser; import io.druid.java.util.common.CompressionUtils; +import io.druid.java.util.emitter.EmittingLogger; import org.apache.commons.io.FileUtils; import org.apache.commons.io.filefilter.TrueFileFilter; import org.apache.commons.io.filefilter.WildcardFileFilter; @@ -97,6 +97,6 @@ protected InputStream openObjectStream(File object) throws IOException @Override protected InputStream wrapObjectStream(File object, InputStream stream) throws IOException { - return object.getPath().endsWith(".gz") ? CompressionUtils.gzipInputStream(stream) : stream; + return CompressionUtils.decompress(stream, object.getPath()); } } From ea4f8544fb33dac91d88af08d5f5892a74e12b8d Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 6 Apr 2018 14:18:55 -0700 Subject: [PATCH 31/67] revert lambda conversion to fix occasional jvm error (#5591) --- .../server/coordinator/DruidCoordinator.java | 91 ++++++++++--------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java index 0280045c3a9d..d8d49b1a2ba8 100644 --- a/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java +++ b/server/src/main/java/io/druid/server/coordinator/DruidCoordinator.java @@ -691,58 +691,63 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter) super( ImmutableList.of( new DruidCoordinatorSegmentInfoLoader(DruidCoordinator.this), - params -> { - // Display info about all historical servers - Iterable servers = FunctionalIterable - .create(serverInventoryView.getInventory()) - .filter(DruidServer::segmentReplicatable) - .transform(DruidServer::toImmutableDruidServer); - - if (log.isDebugEnabled()) { - log.debug("Servers"); - for (ImmutableDruidServer druidServer : servers) { - log.debug(" %s", druidServer); - log.debug(" -- DataSources"); - for (ImmutableDruidDataSource druidDataSource : druidServer.getDataSources()) { - log.debug(" %s", druidDataSource); + new DruidCoordinatorHelper() + { + @Override + public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) + { + // Display info about all historical servers + Iterable servers = FunctionalIterable + .create(serverInventoryView.getInventory()) + .filter(DruidServer::segmentReplicatable) + .transform(DruidServer::toImmutableDruidServer); + + if (log.isDebugEnabled()) { + log.debug("Servers"); + for (ImmutableDruidServer druidServer : servers) { + log.debug(" %s", druidServer); + log.debug(" -- DataSources"); + for (ImmutableDruidDataSource druidDataSource : druidServer.getDataSources()) { + log.debug(" %s", druidDataSource); + } } } - } - // Find all historical servers, group them by subType and sort by ascending usage - final DruidCluster cluster = new DruidCluster(); - for (ImmutableDruidServer server : servers) { - if (!loadManagementPeons.containsKey(server.getName())) { - LoadQueuePeon loadQueuePeon = taskMaster.giveMePeon(server); - loadQueuePeon.start(); - log.info("Created LoadQueuePeon for server[%s].", server.getName()); + // Find all historical servers, group them by subType and sort by ascending usage + final DruidCluster cluster = new DruidCluster(); + for (ImmutableDruidServer server : servers) { + if (!loadManagementPeons.containsKey(server.getName())) { + LoadQueuePeon loadQueuePeon = taskMaster.giveMePeon(server); + loadQueuePeon.start(); + log.info("Created LoadQueuePeon for server[%s].", server.getName()); - loadManagementPeons.put(server.getName(), loadQueuePeon); + loadManagementPeons.put(server.getName(), loadQueuePeon); + } + + cluster.add(new ServerHolder(server, loadManagementPeons.get(server.getName()))); } - cluster.add(new ServerHolder(server, loadManagementPeons.get(server.getName()))); - } + segmentReplicantLookup = SegmentReplicantLookup.make(cluster); - segmentReplicantLookup = SegmentReplicantLookup.make(cluster); + // Stop peons for servers that aren't there anymore. + final Set disappeared = Sets.newHashSet(loadManagementPeons.keySet()); + for (ImmutableDruidServer server : servers) { + disappeared.remove(server.getName()); + } + for (String name : disappeared) { + log.info("Removing listener for server[%s] which is no longer there.", name); + LoadQueuePeon peon = loadManagementPeons.remove(name); + peon.stop(); + } - // Stop peons for servers that aren't there anymore. - final Set disappeared = Sets.newHashSet(loadManagementPeons.keySet()); - for (ImmutableDruidServer server : servers) { - disappeared.remove(server.getName()); + return params.buildFromExisting() + .withDruidCluster(cluster) + .withDatabaseRuleManager(metadataRuleManager) + .withLoadManagementPeons(loadManagementPeons) + .withSegmentReplicantLookup(segmentReplicantLookup) + .withBalancerReferenceTimestamp(DateTimes.nowUtc()) + .build(); } - for (String name : disappeared) { - log.info("Removing listener for server[%s] which is no longer there.", name); - LoadQueuePeon peon = loadManagementPeons.remove(name); - peon.stop(); - } - - return params.buildFromExisting() - .withDruidCluster(cluster) - .withDatabaseRuleManager(metadataRuleManager) - .withLoadManagementPeons(loadManagementPeons) - .withSegmentReplicantLookup(segmentReplicantLookup) - .withBalancerReferenceTimestamp(DateTimes.nowUtc()) - .build(); }, new DruidCoordinatorRuleRunner(DruidCoordinator.this), new DruidCoordinatorCleanupUnneeded(DruidCoordinator.this), From ad6f234e1eab3382b8af9ad0ec3a25191d55bd71 Mon Sep 17 00:00:00 2001 From: Alexander T <37665177+aleksi75@users.noreply.github.com> Date: Sat, 7 Apr 2018 01:13:17 +0200 Subject: [PATCH 32/67] Update lookups-cached-global.md (#5525) Update lookup creation example to work with version 0.12.0 --- .../extensions-core/lookups-cached-global.md | 71 ++++++++++--------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/docs/content/development/extensions-core/lookups-cached-global.md b/docs/content/development/extensions-core/lookups-cached-global.md index a5e7d5eb04ff..7199e76ded23 100644 --- a/docs/content/development/extensions-core/lookups-cached-global.md +++ b/docs/content/development/extensions-core/lookups-cached-global.md @@ -86,23 +86,26 @@ In a simple case where only one [tier](../../querying/lookups.html#dynamic-confi { "realtime_customer2": { "country_code": { - "type": "cachedNamespace", - "extractionNamespace": { - "type": "jdbc", - "connectorConfig": { - "createTables": true, - "connectURI": "jdbc:mysql:\/\/localhost:3306\/druid", - "user": "druid", - "password": "diurd" - }, - "table": "lookupValues", - "keyColumn": "value_id", - "valueColumn": "value_text", - "filter": "value_type='country'", - "tsColumn": "timeColumn" - }, - "firstCacheTimeout": 120000, - "injective":true + "version": "v0", + "lookupExtractorFactory": { + "type": "cachedNamespace", + "extractionNamespace": { + "type": "jdbc", + "connectorConfig": { + "createTables": true, + "connectURI": "jdbc:mysql:\/\/localhost:3306\/druid", + "user": "druid", + "password": "diurd" + }, + "table": "lookupValues", + "keyColumn": "value_id", + "valueColumn": "value_text", + "filter": "value_type='country'", + "tsColumn": "timeColumn" + }, + "firstCacheTimeout": 120000, + "injective": true + } } } } @@ -112,22 +115,26 @@ Where the coordinator endpoint `/druid/coordinator/v1/lookups/realtime_customer2 ```json { - "type": "cachedNamespace", - "extractionNamespace": { - "type": "jdbc", - "connectorConfig": { - "createTables": true, - "connectURI": "jdbc:mysql:\/\/localhost:3306\/druid", - "user": "druid", - "password": "diurd" + "version": "v0", + "lookupExtractorFactory": { + "type": "cachedNamespace", + "extractionNamespace": { + "type": "jdbc", + "connectorConfig": { + "createTables": true, + "connectURI": "jdbc:mysql://localhost:3306/druid", + "user": "druid", + "password": "diurd" + }, + "table": "lookupValues", + "keyColumn": "value_id", + "valueColumn": "value_text", + "filter": "value_type='country'", + "tsColumn": "timeColumn" }, - "table": "lookupTable", - "keyColumn": "country_id", - "valueColumn": "country_name", - "tsColumn": "timeColumn" - }, - "firstCacheTimeout": 120000, - "injective":true + "firstCacheTimeout": 120000, + "injective": true + } } ``` From 685f4063d4e63fba5f9ae11bf496c4c2575a8ebf Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Sat, 7 Apr 2018 06:45:55 -0700 Subject: [PATCH 33/67] DoublesSketchModule: Fix serde for DoublesSketchMergeAggregatorFactory. (#5587) Fixes #5580. --- .../quantiles/DoublesSketchModule.java | 2 + .../DoublesSketchAggregatorTest.java | 97 +++++++++++++------ 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchModule.java b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchModule.java index 0e3081965b03..f26bf348a33a 100644 --- a/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchModule.java +++ b/extensions-core/datasketches/src/main/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchModule.java @@ -35,6 +35,7 @@ public class DoublesSketchModule implements DruidModule { public static final String DOUBLES_SKETCH = "quantilesDoublesSketch"; + public static final String DOUBLES_SKETCH_MERGE = "quantilesDoublesSketchMerge"; public static final String DOUBLES_SKETCH_HISTOGRAM_POST_AGG = "quantilesDoublesSketchToHistogram"; public static final String DOUBLES_SKETCH_QUANTILE_POST_AGG = "quantilesDoublesSketchToQuantile"; @@ -55,6 +56,7 @@ public List getJacksonModules() return Arrays. asList( new SimpleModule("DoublesQuantilesSketchModule").registerSubtypes( new NamedType(DoublesSketchAggregatorFactory.class, DOUBLES_SKETCH), + new NamedType(DoublesSketchMergeAggregatorFactory.class, DOUBLES_SKETCH_MERGE), new NamedType(DoublesSketchToHistogramPostAggregator.class, DOUBLES_SKETCH_HISTOGRAM_POST_AGG), new NamedType(DoublesSketchToQuantilePostAggregator.class, DOUBLES_SKETCH_QUANTILE_POST_AGG), new NamedType(DoublesSketchToQuantilesPostAggregator.class, DOUBLES_SKETCH_QUANTILES_POST_AGG), diff --git a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java index 5da8e787ac1d..0c8f5f2e257b 100644 --- a/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java +++ b/extensions-core/datasketches/src/test/java/io/druid/query/aggregation/datasketches/quantiles/DoublesSketchAggregatorTest.java @@ -27,6 +27,7 @@ import io.druid.java.util.common.granularity.Granularities; import io.druid.java.util.common.guava.Sequence; import io.druid.query.aggregation.AggregationTestHelper; +import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.groupby.GroupByQueryConfig; import io.druid.query.groupby.GroupByQueryRunnerTest; import org.junit.Assert; @@ -58,7 +59,8 @@ public DoublesSketchAggregatorTest(final GroupByQueryConfig config) module.getJacksonModules(), config, tempFolder); timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( module.getJacksonModules(), - tempFolder); + tempFolder + ); } @Parameterized.Parameters(name = "{0}") @@ -66,7 +68,7 @@ public static Collection constructorFeeder() { final List constructors = Lists.newArrayList(); for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { - constructors.add(new Object[] {config}); + constructors.add(new Object[]{config}); } return constructors; } @@ -76,11 +78,29 @@ public static Collection constructorFeeder() public void serializeDeserializeFactoryWithFieldName() throws Exception { ObjectMapper objectMapper = new DefaultObjectMapper(); + new DoublesSketchModule().getJacksonModules().forEach(objectMapper::registerModule); DoublesSketchAggregatorFactory factory = new DoublesSketchAggregatorFactory("name", "filedName", 128); - DoublesSketchAggregatorFactory other = objectMapper.readValue( + AggregatorFactory other = objectMapper.readValue( objectMapper.writeValueAsString(factory), - DoublesSketchAggregatorFactory.class); + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + // this is to test Json properties and equals for the combining factory + @Test + public void serializeDeserializeCombiningFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new DoublesSketchModule().getJacksonModules().forEach(objectMapper::registerModule); + DoublesSketchAggregatorFactory factory = new DoublesSketchMergeAggregatorFactory("name", 128); + + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); Assert.assertEquals(factory, other); } @@ -90,7 +110,8 @@ public void ingestingSketches() throws Exception { Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("quantiles/doubles_sketch_data.tsv").getFile()), - String.join("\n", + String.join( + "\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", @@ -103,16 +124,20 @@ public void ingestingSketches() throws Exception " },", " \"columns\": [\"timestamp\", \"product\", \"sketch\"]", " }", - "}"), - String.join("\n", + "}" + ), + String.join( + "\n", "[", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", - "]"), + "]" + ), 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join("\n", + String.join( + "\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", @@ -127,7 +152,9 @@ public void ingestingSketches() throws Exception " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}")); + "}" + ) + ); List results = seq.toList(); Assert.assertEquals(1, results.size()); Row row = results.get(0); @@ -155,8 +182,8 @@ public void ingestingSketches() throws Exception Assert.assertTrue(histogramObject instanceof double[]); double[] histogram = (double[]) histogramObject; for (final double bin : histogram) { - Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly - // distributed into 4 bins + // 400 items uniformly distributed into 4 bins + Assert.assertEquals(100, bin, 100 * 0.2); } } @@ -165,7 +192,8 @@ public void buildingSketchesAtIngestionTime() throws Exception { Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), - String.join("\n", + String.join( + "\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", @@ -178,12 +206,14 @@ public void buildingSketchesAtIngestionTime() throws Exception " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", - "}"), + "}" + ), "[{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}]", 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join("\n", + String.join( + "\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", @@ -198,7 +228,9 @@ public void buildingSketchesAtIngestionTime() throws Exception " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}")); + "}" + ) + ); List results = seq.toList(); Assert.assertEquals(1, results.size()); Row row = results.get(0); @@ -223,7 +255,7 @@ public void buildingSketchesAtIngestionTime() throws Exception Assert.assertEquals(4, histogram.length); for (final double bin : histogram) { Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly - // distributed into 4 bins + // distributed into 4 bins } } @@ -232,7 +264,8 @@ public void buildingSketchesAtQueryTime() throws Exception { Sequence seq = helper.createIndexAndRunQueryOnSegment( new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), - String.join("\n", + String.join( + "\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", @@ -245,12 +278,14 @@ public void buildingSketchesAtQueryTime() throws Exception " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", - "}"), + "}" + ), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", 0, // minTimestamp Granularities.NONE, 10, // maxRowCount - String.join("\n", + String.join( + "\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", @@ -265,7 +300,9 @@ public void buildingSketchesAtQueryTime() throws Exception " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}")); + "}" + ) + ); List results = seq.toList(); Assert.assertEquals(1, results.size()); Row row = results.get(0); @@ -294,7 +331,7 @@ public void buildingSketchesAtQueryTime() throws Exception double[] histogram = (double[]) histogramObject; for (final double bin : histogram) { Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly - // distributed into 4 bins + // distributed into 4 bins } } @@ -317,7 +354,8 @@ public void QueryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", - "}"), + "}" + ), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", 0, // minTimestamp Granularities.NONE, @@ -338,7 +376,9 @@ public void QueryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}")); + "}" + ) + ); List results = seq.toList(); Assert.assertEquals(1, results.size()); Row row = results.get(0); @@ -367,7 +407,7 @@ public void QueryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Except double[] histogram = (double[]) histogramObject; for (final double bin : histogram) { Assert.assertEquals(100, bin, 100 * 0.2); // 400 items uniformly - // distributed into 4 bins + // distributed into 4 bins } } @@ -390,7 +430,8 @@ public void TimeSeriesQueryInputAsFloat() throws Exception " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", - "}"), + "}" + ), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", 0, // minTimestamp Granularities.NONE, @@ -410,7 +451,9 @@ public void TimeSeriesQueryInputAsFloat() throws Exception " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram1\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", - "}")); + "}" + ) + ); List results = seq.toList(); Assert.assertEquals(1, results.size()); } From 3a5d51630881d7519754bba1fa312fa96472741d Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 9 Apr 2018 17:37:24 -0700 Subject: [PATCH 34/67] Add missing type for MapVirtualColumn (#5598) --- .../io/druid/segment/DruidVirtualColumnsModule.java | 8 +++++++- .../java/io/druid/segment/MapVirtualColumnTest.java | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/extensions-contrib/virtual-columns/src/main/java/io/druid/segment/DruidVirtualColumnsModule.java b/extensions-contrib/virtual-columns/src/main/java/io/druid/segment/DruidVirtualColumnsModule.java index 184225d02453..9db5be737a22 100644 --- a/extensions-contrib/virtual-columns/src/main/java/io/druid/segment/DruidVirtualColumnsModule.java +++ b/extensions-contrib/virtual-columns/src/main/java/io/druid/segment/DruidVirtualColumnsModule.java @@ -20,6 +20,7 @@ package io.druid.segment; import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; @@ -34,7 +35,12 @@ public class DruidVirtualColumnsModule implements DruidModule @Override public List getJacksonModules() { - return ImmutableList.of(new SimpleModule().registerSubtypes(MapVirtualColumn.class)); + return ImmutableList.of( + new SimpleModule(getClass().getSimpleName()) + .registerSubtypes( + new NamedType(MapVirtualColumn.class, "map") + ) + ); } @Override diff --git a/extensions-contrib/virtual-columns/src/test/java/io/druid/segment/MapVirtualColumnTest.java b/extensions-contrib/virtual-columns/src/test/java/io/druid/segment/MapVirtualColumnTest.java index 63155edd2ea8..5f25336f2ab4 100644 --- a/extensions-contrib/virtual-columns/src/test/java/io/druid/segment/MapVirtualColumnTest.java +++ b/extensions-contrib/virtual-columns/src/test/java/io/druid/segment/MapVirtualColumnTest.java @@ -19,6 +19,7 @@ package io.druid.segment; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableMap; @@ -140,6 +141,18 @@ private Druids.SelectQueryBuilder testBuilder() .pagingSpec(new PagingSpec(null, 3)); } + @Test + public void testSerde() throws IOException + { + final ObjectMapper mapper = new DefaultObjectMapper(); + new DruidVirtualColumnsModule().getJacksonModules().forEach(mapper::registerModule); + + final MapVirtualColumn column = new MapVirtualColumn("keys", "values", "params"); + final String json = mapper.writeValueAsString(column); + final VirtualColumn fromJson = mapper.readValue(json, VirtualColumn.class); + Assert.assertEquals(column, fromJson); + } + @Test public void testBasic() { From ee37ff25022307959abd88d09c9d7e98a158161f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Mon, 9 Apr 2018 17:39:19 -0700 Subject: [PATCH 35/67] CompressionUtils: Make gzipInputStream public once again. (#5590) But add a reference to "decompress" and mention that it's preferred when reading from streams that come from files. --- .../main/java/io/druid/java/util/common/CompressionUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java b/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java index c076ea6e8def..9ee57459f564 100644 --- a/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java +++ b/java-util/src/main/java/io/druid/java/util/common/CompressionUtils.java @@ -318,8 +318,10 @@ public static FileUtils.FileCopyResult gunzip(InputStream in, File outFile) thro * @param in The raw input stream * * @return A GZIPInputStream that can handle concatenated gzip streams in the input + * + * @see #decompress(InputStream, String) which should be used instead for streams coming from files */ - private static GZIPInputStream gzipInputStream(final InputStream in) throws IOException + public static GZIPInputStream gzipInputStream(final InputStream in) throws IOException { return new GZIPInputStream( new FilterInputStream(in) From 80fa5094e8e1cde8be67ac7e2ad8f9f89b0e059a Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Tue, 10 Apr 2018 09:15:35 +0530 Subject: [PATCH 36/67] Fix Kerberos Authentication failing requests without cookies and excludedPaths config. (#5596) * Fix Kerberos Authentication failing requests without cookies. KerberosAuthenticator was failing `First` request from the clients. After authentication we were setting the cookie properly but not setting the the authenticated flag in the request. This PR fixed that. Additional Fixes - * Removing of Unused SpnegoFilterConfig - replaced by KerberosAuthenticator * Unused internalClientKeytab and principal from KerberosAuthenticator * Fix docs accordingly and add docs for configuring an escalated client. * Fix excluded path config behavior * spelling correction * Revert "spelling correction" This reverts commit fb754b43d86d7183e661d6b2027be52efff8a83b. * Revert "Fix excluded path config behavior" This reverts commit 390104776991a368f838d9326bc48cb0ddd1f870. --- .../extensions-core/druid-kerberos.md | 13 +- .../kerberos/KerberosAuthenticator.java | 11 +- .../security/kerberos/SpnegoFilterConfig.java | 134 ------------------ .../kerberos/SpnegoFilterConfigTest.java | 75 ---------- 4 files changed, 16 insertions(+), 217 deletions(-) delete mode 100644 extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/SpnegoFilterConfig.java delete mode 100644 extensions-core/druid-kerberos/src/test/java/io/druid/security/kerberos/SpnegoFilterConfigTest.java diff --git a/docs/content/development/extensions-core/druid-kerberos.md b/docs/content/development/extensions-core/druid-kerberos.md index 1ded0e0f5a54..dc0d38a7b5af 100644 --- a/docs/content/development/extensions-core/druid-kerberos.md +++ b/docs/content/development/extensions-core/druid-kerberos.md @@ -31,8 +31,6 @@ The configuration examples in the rest of this document will use "kerberos" as t ### Properties |Property|Possible Values|Description|Default|required| |--------|---------------|-----------|-------|--------| -|`druid.auth.authenticator.kerberos.internalClientPrincipal`|`druid@EXAMPLE.COM`| Principal user name, used for internal node communication|empty|Yes| -|`druid.auth.authenticator.kerberos.internalClientKeytab`|`/etc/security/keytabs/druid.keytab`|Path to keytab file used for internal node communication|empty|Yes| |`druid.auth.authenticator.kerberos.serverPrincipal`|`HTTP/_HOST@EXAMPLE.COM`| SPNego service principal used by druid nodes|empty|Yes| |`druid.auth.authenticator.kerberos.serverKeytab`|`/etc/security/keytabs/spnego.service.keytab`|SPNego service keytab used by druid nodes|empty|Yes| |`druid.auth.authenticator.kerberos.authToLocal`|`RULE:[1:$1@$0](druid@EXAMPLE.COM)s/.*/druid DEFAULT`|It allows you to set a general rule for mapping principal names to local user names. It will be used if there is not an explicit mapping for the principal name that is being translated.|DEFAULT|No| @@ -54,6 +52,17 @@ In Active Directory environment, SPNEGO token in the Authorization header includ which includes all security groups for the user. In some cases when the user belongs to many security groups the header to grow beyond what druid can handle by default. In such cases, max request header size that druid can handle can be increased by setting `druid.server.http.maxRequestHeaderSize` (default 8Kb) and `druid.router.http.maxRequestBufferSize` (default 8Kb). +## Configuring Kerberos Escalated Client + +Druid internal nodes communicate with each other using an escalated http Client. A Kerberos enabled escalated HTTP Client can be configured by following properties - + + +|Property|Example Values|Description|Default|required| +|--------|---------------|-----------|-------|--------| +|`druid.escalator.type`|`kerberos`| Type of Escalator client used for internal node communication.|n/a|Yes| +|`druid.escalator.internalClientPrincipal`|`druid@EXAMPLE.COM`| Principal user name, used for internal node communication|n/a|Yes| +|`druid.escalator.internalClientKeytab`|`/etc/security/keytabs/druid.keytab`|Path to keytab file used for internal node communication|n/a|Yes| +|`druid.escalator.authorizerName`|`MyBasicAuthorizer`|Authorizer that requests should be directed to.|n/a|Yes| ## Accessing Druid HTTP end points when kerberos security is enabled 1. To access druid HTTP endpoints via curl user will need to first login using `kinit` command as follows - diff --git a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java b/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java index 215de9caa4fb..4d601b01c561 100644 --- a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java +++ b/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/KerberosAuthenticator.java @@ -99,8 +99,6 @@ public class KerberosAuthenticator implements Authenticator private final DruidNode node; private final String serverPrincipal; private final String serverKeytab; - private final String internalClientPrincipal; - private final String internalClientKeytab; private final String authToLocal; private final List excludedPaths; private final String cookieSignatureSecret; @@ -111,8 +109,6 @@ public class KerberosAuthenticator implements Authenticator public KerberosAuthenticator( @JsonProperty("serverPrincipal") String serverPrincipal, @JsonProperty("serverKeytab") String serverKeytab, - @JsonProperty("internalClientPrincipal") String internalClientPrincipal, - @JsonProperty("internalClientKeytab") String internalClientKeytab, @JsonProperty("authToLocal") String authToLocal, @JsonProperty("excludedPaths") List excludedPaths, @JsonProperty("cookieSignatureSecret") String cookieSignatureSecret, @@ -123,8 +119,6 @@ public KerberosAuthenticator( this.node = node; this.serverPrincipal = serverPrincipal; this.serverKeytab = serverKeytab; - this.internalClientPrincipal = internalClientPrincipal; - this.internalClientKeytab = internalClientKeytab; this.authToLocal = authToLocal == null ? "DEFAULT" : authToLocal; this.excludedPaths = excludedPaths == null ? DEFAULT_EXCLUDED_PATHS : excludedPaths; this.cookieSignatureSecret = cookieSignatureSecret; @@ -344,6 +338,11 @@ public Principal getUserPrincipal() isHttps ); } + // Since this request is validated also set DRUID_AUTHENTICATION_RESULT + request.setAttribute( + AuthConfig.DRUID_AUTHENTICATION_RESULT, + new AuthenticationResult(token.getName(), authorizerName, null) + ); doFilter(filterChain, httpRequest, httpResponse); } } else { diff --git a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/SpnegoFilterConfig.java b/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/SpnegoFilterConfig.java deleted file mode 100644 index 4dd4f9b01f97..000000000000 --- a/extensions-core/druid-kerberos/src/main/java/io/druid/security/kerberos/SpnegoFilterConfig.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.security.kerberos; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import java.util.Collections; -import java.util.List; - -public class SpnegoFilterConfig -{ - - public static final List DEFAULT_EXCLUDED_PATHS = Collections.emptyList(); - - @JsonProperty - private final String principal; - - @JsonProperty - private final String keytab; - - @JsonProperty - private final String authToLocal; - - @JsonProperty - private final List excludedPaths; - - @JsonProperty - private final String cookieSignatureSecret; - - @JsonCreator - public SpnegoFilterConfig( - @JsonProperty("principal") String principal, - @JsonProperty("keytab") String keytab, - @JsonProperty("authToLocal") String authToLocal, - @JsonProperty("excludedPaths") List excludedPaths, - @JsonProperty("cookieSignatureSecret") String cookieSignatureSecret - ) - { - this.principal = principal; - this.keytab = keytab; - this.authToLocal = authToLocal == null ? "DEFAULT" : authToLocal; - this.excludedPaths = excludedPaths == null ? DEFAULT_EXCLUDED_PATHS : excludedPaths; - this.cookieSignatureSecret = cookieSignatureSecret; - } - - @JsonProperty - public String getPrincipal() - { - return principal; - } - - @JsonProperty - public String getKeytab() - { - return keytab; - } - - @JsonProperty - public String getAuthToLocal() - { - return authToLocal; - } - - @JsonProperty - public List getExcludedPaths() - { - return excludedPaths; - } - - @JsonProperty - public String getCookieSignatureSecret() - { - return cookieSignatureSecret; - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - SpnegoFilterConfig that = (SpnegoFilterConfig) o; - - if (principal != null ? !principal.equals(that.principal) : that.principal != null) { - return false; - } - if (keytab != null ? !keytab.equals(that.keytab) : that.keytab != null) { - return false; - } - if (authToLocal != null ? !authToLocal.equals(that.authToLocal) : that.authToLocal != null) { - return false; - } - if (excludedPaths != null ? !excludedPaths.equals(that.excludedPaths) : that.excludedPaths != null) { - return false; - } - return cookieSignatureSecret != null - ? cookieSignatureSecret.equals(that.cookieSignatureSecret) - : that.cookieSignatureSecret == null; - - } - - @Override - public int hashCode() - { - int result = principal != null ? principal.hashCode() : 0; - result = 31 * result + (keytab != null ? keytab.hashCode() : 0); - result = 31 * result + (authToLocal != null ? authToLocal.hashCode() : 0); - result = 31 * result + (excludedPaths != null ? excludedPaths.hashCode() : 0); - result = 31 * result + (cookieSignatureSecret != null ? cookieSignatureSecret.hashCode() : 0); - return result; - } -} diff --git a/extensions-core/druid-kerberos/src/test/java/io/druid/security/kerberos/SpnegoFilterConfigTest.java b/extensions-core/druid-kerberos/src/test/java/io/druid/security/kerberos/SpnegoFilterConfigTest.java deleted file mode 100644 index e1ee98606324..000000000000 --- a/extensions-core/druid-kerberos/src/test/java/io/druid/security/kerberos/SpnegoFilterConfigTest.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to Metamarkets Group Inc. (Metamarkets) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Metamarkets licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package io.druid.security.kerberos; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.inject.Binder; -import com.google.inject.Guice; -import com.google.inject.Injector; -import com.google.inject.Module; -import com.google.inject.Provides; -import io.druid.guice.ConfigModule; -import io.druid.guice.DruidGuiceExtensions; -import io.druid.guice.JsonConfigProvider; -import io.druid.guice.LazySingleton; -import io.druid.guice.PropertiesModule; -import io.druid.jackson.DefaultObjectMapper; -import org.junit.Assert; -import org.junit.Test; - -import java.util.Arrays; -import java.util.Properties; - -public class SpnegoFilterConfigTest -{ - @Test - public void testserde() - { - Injector injector = Guice.createInjector( - new Module() - { - @Override - public void configure(Binder binder) - { - binder.install(new PropertiesModule(Arrays.asList("test.runtime.properties"))); - binder.install(new ConfigModule()); - binder.install(new DruidGuiceExtensions()); - JsonConfigProvider.bind(binder, "druid.hadoop.security.spnego", SpnegoFilterConfig.class); - } - - @Provides - @LazySingleton - public ObjectMapper jsonMapper() - { - return new DefaultObjectMapper(); - } - } - ); - - Properties props = injector.getInstance(Properties.class); - SpnegoFilterConfig config = injector.getInstance(SpnegoFilterConfig.class); - - Assert.assertEquals(props.getProperty("druid.hadoop.security.spnego.principal"), config.getPrincipal()); - Assert.assertEquals(props.getProperty("druid.hadoop.security.spnego.keytab"), config.getKeytab()); - Assert.assertEquals(props.getProperty("druid.hadoop.security.spnego.authToLocal"), config.getAuthToLocal()); - - - } -} From ff27c5477403aacb3bbf4c1d6167cf7bcafa1f58 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 10 Apr 2018 10:35:09 -0700 Subject: [PATCH 37/67] SQL: Remove useless boolean CASTs in filters. (#5619) --- .../sql/calcite/expression/Expressions.java | 9 ++++-- .../druid/sql/calcite/CalciteQueryTest.java | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java index 3cff45e36546..8917260d7417 100644 --- a/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/io/druid/sql/calcite/expression/Expressions.java @@ -218,9 +218,12 @@ public static DimFilter toFilter( final RexNode expression ) { - if (expression.getKind() == SqlKind.AND - || expression.getKind() == SqlKind.OR - || expression.getKind() == SqlKind.NOT) { + if (expression.getKind() == SqlKind.CAST && expression.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { + // Calcite sometimes leaves errant, useless cast-to-booleans inside filters. Strip them and continue. + return toFilter(plannerContext, rowSignature, Iterables.getOnlyElement(((RexCall) expression).getOperands())); + } else if (expression.getKind() == SqlKind.AND + || expression.getKind() == SqlKind.OR + || expression.getKind() == SqlKind.NOT) { final List filters = Lists.newArrayList(); for (final RexNode rexNode : ((RexCall) expression).getOperands()) { final DimFilter nextFilter = toFilter( diff --git a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java index 12c9372a0919..5130a218045d 100644 --- a/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/CalciteQueryTest.java @@ -2965,6 +2965,34 @@ public void testCountStarWithTimeFilter() throws Exception ); } + @Test + public void testRemoveUselessCaseWhen() throws Exception + { + testQuery( + "SELECT COUNT(*) FROM druid.foo\n" + + "WHERE\n" + + " CASE\n" + + " WHEN __time >= TIME_PARSE('2000-01-01 00:00:00', 'yyyy-MM-dd HH:mm:ss') AND __time < TIMESTAMP '2001-01-01 00:00:00'\n" + + " THEN true\n" + + " ELSE false\n" + + " END\n" + + "OR\n" + + " __time >= TIMESTAMP '2010-01-01 00:00:00' AND __time < TIMESTAMP '2011-01-01 00:00:00'", + ImmutableList.of( + Druids.newTimeseriesQueryBuilder() + .dataSource(CalciteTests.DATASOURCE1) + .intervals(QSS(Intervals.of("2000/2001"), Intervals.of("2010/2011"))) + .granularity(Granularities.ALL) + .aggregators(AGGS(new CountAggregatorFactory("a0"))) + .context(TIMESERIES_CONTEXT_DEFAULT) + .build() + ), + ImmutableList.of( + new Object[]{3L} + ) + ); + } + @Test public void testCountStarWithTimeMillisecondFilters() throws Exception { From b32aad9ab4f797fc558c88db54f0f8da5675cca4 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Wed, 11 Apr 2018 22:57:33 +0530 Subject: [PATCH 38/67] Fix some broken links in druid docs (#5622) * Fix some broken links in druid docs * review comment --- docs/content/configuration/broker.md | 2 +- docs/content/development/extensions-core/avro.md | 2 +- .../development/extensions-core/kafka-extraction-namespace.md | 2 +- docs/content/development/extensions-core/mysql.md | 4 ++-- docs/content/development/extensions-core/protobuf.md | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/content/configuration/broker.md b/docs/content/configuration/broker.md index 0d57e843e348..cd074353201f 100644 --- a/docs/content/configuration/broker.md +++ b/docs/content/configuration/broker.md @@ -47,7 +47,7 @@ Druid uses Jetty to serve HTTP requests. |`druid.broker.http.compressionCodec`|Compression codec the Broker uses to communicate with historical and real-time processes. May be "gzip" or "identity".|gzip| |`druid.broker.http.readTimeout`|The timeout for data reads from historical and real-time processes.|PT15M| |`druid.broker.http.unusedConnectionTimeout`|The timeout for idle connections in connection pool. This timeout should be less than `druid.broker.http.readTimeout`. Set this timeout = ~90% of `druid.broker.http.readTimeout`|`PT4M`| -|`druid.server.http.maxQueryTimeout`|Maximum allowed value (in milliseconds) for `timeout` parameter. See [query-context](query-context.html) to know more about `timeout`. Query is rejected if the query context `timeout` is greater than this value. |Long.MAX_VALUE| +|`druid.server.http.maxQueryTimeout`|Maximum allowed value (in milliseconds) for `timeout` parameter. See [query-context](../querying/query-context.html) to know more about `timeout`. Query is rejected if the query context `timeout` is greater than this value. |Long.MAX_VALUE| |`druid.server.http.maxRequestHeaderSize`|Maximum size of a request header in bytes. Larger headers consume more memory and can make a server more vulnerable to denial of service attacks. |8 * 1024| diff --git a/docs/content/development/extensions-core/avro.md b/docs/content/development/extensions-core/avro.md index a8a4b820e221..57d6355b6e32 100644 --- a/docs/content/development/extensions-core/avro.md +++ b/docs/content/development/extensions-core/avro.md @@ -16,7 +16,7 @@ This is for streaming/realtime ingestion. | avroBytesDecoder | JSON Object | Specifies how to decode bytes to Avro record. | yes | | parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be an "avro" parseSpec. | yes | -An Avro parseSpec can contain a [flattenSpec](../../ingestion/flatten-spec.html) using either the "root" or "path" +An Avro parseSpec can contain a [flattenSpec](../../ingestion/flatten-json.html) using either the "root" or "path" field types, which can be used to read nested Avro records. The "jq" field type is not currently supported for Avro. For example, using Avro stream parser with schema repo Avro bytes decoder: diff --git a/docs/content/development/extensions-core/kafka-extraction-namespace.md b/docs/content/development/extensions-core/kafka-extraction-namespace.md index fbf8915f4248..e1343296d62b 100644 --- a/docs/content/development/extensions-core/kafka-extraction-namespace.md +++ b/docs/content/development/extensions-core/kafka-extraction-namespace.md @@ -25,7 +25,7 @@ If you need updates to populate as promptly as possible, it is possible to plug |`kafkaTopic`|The kafka topic to read the data from|Yes|| |`kafkaProperties`|Kafka consumer properties. At least"zookeeper.connect" must be specified. Only the zookeeper connector is supported|Yes|| |`connectTimeout`|How long to wait for an initial connection|No|`0` (do not wait)| -|`isOneToOne`|The map is a one-to-one (see[Lookup DimensionSpecs](../querying/dimensionspecs.html))|No|`false`| +|`isOneToOne`|The map is a one-to-one (see [Lookup DimensionSpecs](../../querying/dimensionspecs.html))|No|`false`| The extension `kafka-extraction-namespace` enables reading from a kafka feed which has name/key pairs to allow renaming of dimension values. An example use case would be to rename an ID to a human readable format. diff --git a/docs/content/development/extensions-core/mysql.md b/docs/content/development/extensions-core/mysql.md index 3276811eaed2..a51b84ffa9da 100644 --- a/docs/content/development/extensions-core/mysql.md +++ b/docs/content/development/extensions-core/mysql.md @@ -65,11 +65,11 @@ Make sure to [include](../../operations/including-extensions.html) `mysql-metada |`druid.metadata.mysql.ssl.useSSL`|Enable SSL|`false`|no| |`druid.metadata.mysql.ssl.clientCertificateKeyStoreUrl`|The file path URL to the client certificate key store.|none|no| |`druid.metadata.mysql.ssl.clientCertificateKeyStoreType`|The type of the key store where the client certificate is stored.|none|no| -|`druid.metadata.mysql.ssl.clientCertificateKeyStorePassword`|The [Password Provider](../operations/password-provider.html) or String password for the client key store.|none|no| +|`druid.metadata.mysql.ssl.clientCertificateKeyStorePassword`|The [Password Provider](../../operations/password-provider.html) or String password for the client key store.|none|no| |`druid.metadata.mysql.ssl.verifyServerCertificate`|Enables server certificate verification.|false|no| |`druid.metadata.mysql.ssl.trustCertificateKeyStoreUrl`|The file path to the trusted root certificate key store.|Default trust store provided by MySQL|yes if `verifyServerCertificate` is set to true and a custom trust store is used| |`druid.metadata.mysql.ssl.trustCertificateKeyStoreType`|The type of the key store where trusted root certificates are stored.|JKS|yes if `verifyServerCertificate` is set to true and keystore type is not JKS| -|`druid.metadata.mysql.ssl.trustCertificateKeyStorePassword`|The [Password Provider](../operations/password-provider.html) or String password for the trust store.|none|yes if `verifyServerCertificate` is set to true and password is not null| +|`druid.metadata.mysql.ssl.trustCertificateKeyStorePassword`|The [Password Provider](../../operations/password-provider.html) or String password for the trust store.|none|yes if `verifyServerCertificate` is set to true and password is not null| |`druid.metadata.mysql.ssl.enabledSSLCipherSuites`|Overrides the existing cipher suites with these cipher suites.|none|no| |`druid.metadata.mysql.ssl.enabledTLSProtocols`|Overrides the TLS protocols with these protocols.|none|no| diff --git a/docs/content/development/extensions-core/protobuf.md b/docs/content/development/extensions-core/protobuf.md index c45e47c6ecd5..1a066c4d4b9f 100644 --- a/docs/content/development/extensions-core/protobuf.md +++ b/docs/content/development/extensions-core/protobuf.md @@ -18,7 +18,7 @@ This extension enables Druid to ingest and understand the Protobuf data format. ## Example: Load Protobuf messages from Kafka -This example demonstrates how to load Protobuf messages from Kafka. Please read the [Load from Kafka tutorial](../../tutorial/tutorial-kafka.html) first. This example will use the same "metrics" dataset. +This example demonstrates how to load Protobuf messages from Kafka. Please read the [Load from Kafka tutorial](../../tutorials/tutorial-kafka.html) first. This example will use the same "metrics" dataset. Files used in this example are found at `./examples/quickstart/protobuf` in your Druid directory. From afa75e04b779a651abffd2f4f272f2afb07fdeea Mon Sep 17 00:00:00 2001 From: Caroline1000 Date: Wed, 11 Apr 2018 12:57:22 -0700 Subject: [PATCH 39/67] change header in overlord console; minor querydoc change (#5625) * change header in overlord console; minor querydoc change * remove change to overlord console * address Gian comments --- docs/content/querying/groupbyquery.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/content/querying/groupbyquery.md b/docs/content/querying/groupbyquery.md index d67671fdd188..52dfddfd24d9 100644 --- a/docs/content/querying/groupbyquery.md +++ b/docs/content/querying/groupbyquery.md @@ -235,10 +235,8 @@ strategy perform the outer query on the broker in a single-threaded fashion. #### Configurations -This section describes the configurations for groupBy queries. You can set system-wide configurations by adding them to runtime properties or query-specific configurations by adding them to query contexts. All runtime properties are prefixed by `druid.query.groupBy`. - -#### Commonly tuned configurations - +This section describes the configurations for groupBy queries. You can set the runtime properties in the `runtime.properties` file on broker, historical, and MiddleManager nodes. You can set the query context parameters through the [query context](query-context.html). + ##### Configurations for groupBy v2 Supported runtime properties: From e6efd75a3d1ef7d74ec9bb5739d9184c19969055 Mon Sep 17 00:00:00 2001 From: Nishant Bangarwa Date: Thu, 12 Apr 2018 05:40:07 +0530 Subject: [PATCH 40/67] Add config to allow setting up custom unsecured paths for druid nodes. (#5614) * Add config to allow setting up custom unsecured paths for druid nodes. * return all resources for Unsecured paths * review comment - Add test * fix tests * fix test --- docs/content/configuration/auth.md | 1 + .../overlord/http/OverlordResourceTest.java | 2 +- .../indexing/overlord/http/OverlordTest.java | 1 + .../supervisor/SupervisorResourceTest.java | 6 ++++ integration-tests/docker/coordinator.conf | 1 + .../ITBasicAuthConfigurationTest.java | 10 ++++++ .../io/druid/server/security/AuthConfig.java | 35 +++++++++++-------- .../server/security/AuthorizationUtils.java | 13 +++++++ .../security/SecuritySanityCheckFilter.java | 4 ++- .../security/UnsecuredResourceFilter.java | 1 + .../firehose/EventReceiverFirehoseTest.java | 4 +++ .../io/druid/server/QueryResourceTest.java | 17 +++++---- .../server/http/DatasourcesResourceTest.java | 7 +++- .../server/http/IntervalsResourceTest.java | 4 +++ .../security/ResourceFilterTestHelper.java | 3 +- .../SecuritySanityCheckFilterTest.java | 2 ++ .../java/io/druid/cli/CliMiddleManager.java | 4 ++- .../main/java/io/druid/cli/CliOverlord.java | 15 +++++++- .../CoordinatorJettyServerInitializer.java | 5 ++- .../MiddleManagerJettyServerInitializer.java | 10 ++++++ .../cli/QueryJettyServerInitializer.java | 7 +++- .../cli/RouterJettyServerInitializer.java | 7 +++- .../sql/calcite/http/SqlResourceTest.java | 1 + 23 files changed, 131 insertions(+), 29 deletions(-) diff --git a/docs/content/configuration/auth.md b/docs/content/configuration/auth.md index ae17ebf14d0d..296d9ba2b1e8 100644 --- a/docs/content/configuration/auth.md +++ b/docs/content/configuration/auth.md @@ -9,6 +9,7 @@ layout: doc_page |`druid.auth.authenticationChain`|JSON List of Strings|List of Authenticator type names|["allowAll"]|no| |`druid.escalator.type`|String|Type of the Escalator that should be used for internal Druid communications. This Escalator must use an authentication scheme that is supported by an Authenticator in `druid.auth.authenticationChain`.|"noop"|no| |`druid.auth.authorizers`|JSON List of Strings|List of Authorizer type names |["allowAll"]|no| +|`druid.auth.unsecuredPaths`| List of Strings|List of paths for which security checks will not be performed. All requests to these paths will be allowed.|[]|no| ## Enabling Authentication/Authorization diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordResourceTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordResourceTest.java index 9c6917d9dac7..d3cd412e5af4 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordResourceTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordResourceTest.java @@ -126,7 +126,7 @@ public Access authorize(AuthenticationResult authenticationResult, Resource reso public void expectAuthorizationTokenCheck() { AuthenticationResult authenticationResult = new AuthenticationResult("druid", "druid", null); - + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(authenticationResult) diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordTest.java index f94cfd089fc6..8889aa284560 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/http/OverlordTest.java @@ -128,6 +128,7 @@ private void tearDownServerAndCurator() public void setUp() throws Exception { req = EasyMock.createMock(HttpServletRequest.class); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) diff --git a/indexing-service/src/test/java/io/druid/indexing/overlord/supervisor/SupervisorResourceTest.java b/indexing-service/src/test/java/io/druid/indexing/overlord/supervisor/SupervisorResourceTest.java index b1947e3f202e..747c7dd0b64c 100644 --- a/indexing-service/src/test/java/io/druid/indexing/overlord/supervisor/SupervisorResourceTest.java +++ b/indexing-service/src/test/java/io/druid/indexing/overlord/supervisor/SupervisorResourceTest.java @@ -113,6 +113,7 @@ public List getDataSources() EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)); EasyMock.expect(supervisorManager.createOrUpdateAndStartSupervisor(spec)).andReturn(true); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -162,6 +163,7 @@ public List getDataSources() EasyMock.expect(supervisorManager.getSupervisorIds()).andReturn(supervisorIds).atLeastOnce(); EasyMock.expect(supervisorManager.getSupervisorSpec("id1")).andReturn(Optional.of(spec1)); EasyMock.expect(supervisorManager.getSupervisorSpec("id2")).andReturn(Optional.of(spec2)); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -345,6 +347,7 @@ public void testSpecGetAllHistory() SupervisorSpec spec2 = new TestSupervisorSpec("id2", null, Arrays.asList("datasource2")); EasyMock.expect(supervisorManager.getSupervisorSpec("id1")).andReturn(Optional.of(spec1)).atLeastOnce(); EasyMock.expect(supervisorManager.getSupervisorSpec("id2")).andReturn(Optional.of(spec2)).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -457,6 +460,7 @@ public void testSpecGetAllHistoryWithAuthFailureFiltering() SupervisorSpec spec2 = new TestSupervisorSpec("id2", null, Arrays.asList("datasource2")); EasyMock.expect(supervisorManager.getSupervisorSpec("id1")).andReturn(Optional.of(spec1)).atLeastOnce(); EasyMock.expect(supervisorManager.getSupervisorSpec("id2")).andReturn(Optional.of(spec2)).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("wronguser", "druid", null) @@ -547,6 +551,7 @@ public void testSpecGetHistory() EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)).times(3); EasyMock.expect(supervisorManager.getSupervisorHistory()).andReturn(history).times(3); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -644,6 +649,7 @@ public void testSpecGetHistoryWithAuthFailure() throws Exception EasyMock.expect(taskMaster.getSupervisorManager()).andReturn(Optional.of(supervisorManager)).times(4); EasyMock.expect(supervisorManager.getSupervisorHistory()).andReturn(history).times(4); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).atLeastOnce(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("notdruid", "druid", null) diff --git a/integration-tests/docker/coordinator.conf b/integration-tests/docker/coordinator.conf index 41ed94889acc..c6979432e752 100644 --- a/integration-tests/docker/coordinator.conf +++ b/integration-tests/docker/coordinator.conf @@ -28,6 +28,7 @@ command=java -Ddruid.escalator.authorizerName=basic -Ddruid.auth.authorizers="[\"basic\"]" -Ddruid.auth.authorizer.basic.type=basic + -Ddruid.auth.unsecuredPaths="[\"/druid/coordinator/v1/loadqueue\"]" -cp /shared/docker/lib/* io.druid.cli.Main server coordinator redirect_stderr=true diff --git a/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java b/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java index c4d3b41e908f..17c87efa0fd2 100644 --- a/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java +++ b/integration-tests/src/test/java/io/druid/tests/security/ITBasicAuthConfigurationTest.java @@ -100,6 +100,11 @@ public void testAuthConfiguration() throws Exception httpClient ); + final HttpClient unsecuredClient = httpClient; + + // check that we are allowed to access unsecured path without credentials. + checkUnsecuredCoordinatorLoadQueuePath(unsecuredClient); + // check that admin works checkNodeAccess(adminClient); @@ -221,6 +226,11 @@ public void testAuthConfiguration() throws Exception testAvaticaAuthFailure(routerUrl); } + private void checkUnsecuredCoordinatorLoadQueuePath(HttpClient client) + { + makeRequest(client, HttpMethod.GET, config.getCoordinatorUrl() + "/druid/coordinator/v1/loadqueue", null); + } + private void testAvaticaQuery(String url) { LOG.info("URL: " + url); diff --git a/server/src/main/java/io/druid/server/security/AuthConfig.java b/server/src/main/java/io/druid/server/security/AuthConfig.java index af4768a4d5e0..95f67c9ee0fc 100644 --- a/server/src/main/java/io/druid/server/security/AuthConfig.java +++ b/server/src/main/java/io/druid/server/security/AuthConfig.java @@ -22,7 +22,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.Collections; import java.util.List; +import java.util.Objects; public class AuthConfig { @@ -36,21 +38,25 @@ public class AuthConfig */ public static final String DRUID_AUTHORIZATION_CHECKED = "Druid-Authorization-Checked"; + public static final String DRUID_ALLOW_UNSECURED_PATH = "Druid-Allow-Unsecured-Path"; + public static final String ALLOW_ALL_NAME = "allowAll"; public AuthConfig() { - this(null, null); + this(null, null, null); } @JsonCreator public AuthConfig( @JsonProperty("authenticatorChain") List authenticationChain, - @JsonProperty("authorizers") List authorizers + @JsonProperty("authorizers") List authorizers, + @JsonProperty("unsecuredPaths") List unsecuredPaths ) { this.authenticatorChain = authenticationChain; this.authorizers = authorizers; + this.unsecuredPaths = unsecuredPaths == null ? Collections.emptyList() : unsecuredPaths; } @JsonProperty @@ -59,6 +65,9 @@ public AuthConfig( @JsonProperty private List authorizers; + @JsonProperty + private final List unsecuredPaths; + public List getAuthenticatorChain() { return authenticatorChain; @@ -69,12 +78,18 @@ public List getAuthorizers() return authorizers; } + public List getUnsecuredPaths() + { + return unsecuredPaths; + } + @Override public String toString() { return "AuthConfig{" + "authenticatorChain='" + authenticatorChain + '\'' + ", authorizers='" + authorizers + '\'' + + ", unsecuredPaths='" + unsecuredPaths + '\'' + '}'; } @@ -87,23 +102,15 @@ public boolean equals(Object o) if (o == null || getClass() != o.getClass()) { return false; } - AuthConfig that = (AuthConfig) o; - - if (getAuthenticatorChain() != null - ? !getAuthenticatorChain().equals(that.getAuthenticatorChain()) - : that.getAuthenticatorChain() != null) { - return false; - } - return getAuthorizers() != null ? getAuthorizers().equals(that.getAuthorizers()) : that.getAuthorizers() == null; - + return Objects.equals(authenticatorChain, that.authenticatorChain) && + Objects.equals(authorizers, that.authorizers) && + Objects.equals(unsecuredPaths, that.unsecuredPaths); } @Override public int hashCode() { - int result = getAuthenticatorChain() != null ? getAuthenticatorChain().hashCode() : 0; - result = 31 * result + (getAuthorizers() != null ? getAuthorizers().hashCode() : 0); - return result; + return Objects.hash(authenticatorChain, authorizers, unsecuredPaths); } } diff --git a/server/src/main/java/io/druid/server/security/AuthorizationUtils.java b/server/src/main/java/io/druid/server/security/AuthorizationUtils.java index b14d6a788511..9d478326fe6a 100644 --- a/server/src/main/java/io/druid/server/security/AuthorizationUtils.java +++ b/server/src/main/java/io/druid/server/security/AuthorizationUtils.java @@ -155,6 +155,10 @@ public static Access authorizeAllResourceActions( final AuthorizerMapper authorizerMapper ) { + if (request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH) != null) { + return Access.OK; + } + if (request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED) != null) { throw new ISE("Request already had authorization check."); } @@ -201,6 +205,10 @@ public static Iterable filterAuthorizedResources( final AuthorizerMapper authorizerMapper ) { + if (request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH) != null) { + return resources; + } + if (request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED) != null) { throw new ISE("Request already had authorization check."); } @@ -309,6 +317,11 @@ public static Map> filterAuthorizedRes final AuthorizerMapper authorizerMapper ) { + + if (request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH) != null) { + return unfilteredResources; + } + if (request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED) != null) { throw new ISE("Request already had authorization check."); } diff --git a/server/src/main/java/io/druid/server/security/SecuritySanityCheckFilter.java b/server/src/main/java/io/druid/server/security/SecuritySanityCheckFilter.java index 5e1b79cf6afc..0dee53add770 100644 --- a/server/src/main/java/io/druid/server/security/SecuritySanityCheckFilter.java +++ b/server/src/main/java/io/druid/server/security/SecuritySanityCheckFilter.java @@ -76,8 +76,10 @@ public void doFilter( // make sure the original request isn't trying to fake the auth token checks Boolean authInfoChecked = (Boolean) request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED); + Boolean allowUnsecured = (Boolean) request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH); + AuthenticationResult result = (AuthenticationResult) request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT); - if (authInfoChecked != null || result != null) { + if (authInfoChecked != null || result != null || allowUnsecured != null) { sendJsonError(httpResponse, Response.SC_FORBIDDEN, unauthorizedMessage, out); out.close(); return; diff --git a/server/src/main/java/io/druid/server/security/UnsecuredResourceFilter.java b/server/src/main/java/io/druid/server/security/UnsecuredResourceFilter.java index 81374411fbf2..b0e7b84fa72b 100644 --- a/server/src/main/java/io/druid/server/security/UnsecuredResourceFilter.java +++ b/server/src/main/java/io/druid/server/security/UnsecuredResourceFilter.java @@ -53,6 +53,7 @@ public void doFilter( // This request will not go to an Authorizer, so we need to set this for PreResponseAuthorizationCheckFilter servletRequest.setAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED, true); + servletRequest.setAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH, true); filterChain.doFilter(servletRequest, servletResponse); } diff --git a/server/src/test/java/io/druid/segment/realtime/firehose/EventReceiverFirehoseTest.java b/server/src/test/java/io/druid/segment/realtime/firehose/EventReceiverFirehoseTest.java index c0bc1c1d6dbc..ffe066f5d022 100644 --- a/server/src/test/java/io/druid/segment/realtime/firehose/EventReceiverFirehoseTest.java +++ b/server/src/test/java/io/druid/segment/realtime/firehose/EventReceiverFirehoseTest.java @@ -144,6 +144,7 @@ public void testMultipleThreads() throws InterruptedException, IOException, Time EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(AllowAllAuthenticator.ALLOW_ALL_RESULT) .anyTimes(); @@ -246,6 +247,7 @@ public void testShutdownWithPrevTime() throws Exception EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(AllowAllAuthenticator.ALLOW_ALL_RESULT) .anyTimes(); @@ -265,6 +267,7 @@ public void testShutdown() throws Exception EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(AllowAllAuthenticator.ALLOW_ALL_RESULT) .anyTimes(); @@ -403,6 +406,7 @@ private void setUpRequestExpectations(String producerId, String producerSequence EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(AllowAllAuthenticator.ALLOW_ALL_RESULT) .anyTimes(); diff --git a/server/src/test/java/io/druid/server/QueryResourceTest.java b/server/src/test/java/io/druid/server/QueryResourceTest.java index b099f002d267..3d0960d6544c 100644 --- a/server/src/test/java/io/druid/server/QueryResourceTest.java +++ b/server/src/test/java/io/druid/server/QueryResourceTest.java @@ -170,6 +170,7 @@ public void testGoodQuery() throws IOException EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(authenticationResult) @@ -206,6 +207,7 @@ public void testSecuredQuery() throws Exception EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(authenticationResult) @@ -247,13 +249,13 @@ public Access authorize(AuthenticationResult authenticationResult, Resource reso new DefaultGenericQueryMetricsFactory(jsonMapper), new NoopServiceEmitter(), testRequestLogger, - new AuthConfig(null, null), + new AuthConfig(), authMapper ), jsonMapper, jsonMapper, queryManager, - new AuthConfig(null, null), + new AuthConfig(), authMapper, new DefaultGenericQueryMetricsFactory(jsonMapper) ); @@ -301,6 +303,7 @@ public void testSecuredCancelQuery() throws Exception EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); + EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(authenticationResult) @@ -354,13 +357,13 @@ public Access authorize(AuthenticationResult authenticationResult, Resource reso new DefaultGenericQueryMetricsFactory(jsonMapper), new NoopServiceEmitter(), testRequestLogger, - new AuthConfig(null, null), + new AuthConfig(), authMapper ), jsonMapper, jsonMapper, queryManager, - new AuthConfig(null, null), + new AuthConfig(), authMapper, new DefaultGenericQueryMetricsFactory(jsonMapper) ); @@ -426,6 +429,8 @@ public void testDenySecuredCancelQuery() throws Exception .andReturn(null) .anyTimes(); + EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); + EasyMock.expect(testServletRequest.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) .andReturn(authenticationResult) .anyTimes(); @@ -475,13 +480,13 @@ public Access authorize(AuthenticationResult authenticationResult, Resource reso new DefaultGenericQueryMetricsFactory(jsonMapper), new NoopServiceEmitter(), testRequestLogger, - new AuthConfig(null, null), + new AuthConfig(), authMapper ), jsonMapper, jsonMapper, queryManager, - new AuthConfig(null, null), + new AuthConfig(), authMapper, new DefaultGenericQueryMetricsFactory(jsonMapper) ); diff --git a/server/src/test/java/io/druid/server/http/DatasourcesResourceTest.java b/server/src/test/java/io/druid/server/http/DatasourcesResourceTest.java index b2f692f44de8..1c3e31bd2709 100644 --- a/server/src/test/java/io/druid/server/http/DatasourcesResourceTest.java +++ b/server/src/test/java/io/druid/server/http/DatasourcesResourceTest.java @@ -128,6 +128,7 @@ public void testGetFullQueryableDataSources() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).once(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -142,6 +143,7 @@ public void testGetFullQueryableDataSources() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).once(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -184,6 +186,7 @@ public void testSecuredGetFullQueryableDataSources() ImmutableList.of(listDataSources.get(0), listDataSources.get(1)) ).once(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( authenticationResult @@ -200,6 +203,7 @@ public void testSecuredGetFullQueryableDataSources() ImmutableList.of(listDataSources.get(0), listDataSources.get(1)) ).once(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( authenticationResult @@ -236,7 +240,7 @@ public Access authorize(AuthenticationResult authenticationResult1, Resource res inventoryView, null, null, - new AuthConfig(null, null), + new AuthConfig(), authMapper ); Response response = datasourcesResource.getQueryableDataSources("full", null, request); @@ -277,6 +281,7 @@ public void testGetSimpleQueryableDataSources() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) diff --git a/server/src/test/java/io/druid/server/http/IntervalsResourceTest.java b/server/src/test/java/io/druid/server/http/IntervalsResourceTest.java index 067bf869d1b6..a83b2f54feb7 100644 --- a/server/src/test/java/io/druid/server/http/IntervalsResourceTest.java +++ b/server/src/test/java/io/druid/server/http/IntervalsResourceTest.java @@ -108,6 +108,7 @@ public void testGetIntervals() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -145,6 +146,7 @@ public void testSimpleGetSpecificIntervals() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -176,6 +178,7 @@ public void testFullGetSpecificIntervals() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) @@ -209,6 +212,7 @@ public void testGetSpecificIntervals() EasyMock.expect(inventoryView.getInventory()).andReturn( ImmutableList.of(server) ).atLeastOnce(); + EasyMock.expect(request.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); EasyMock.expect(request.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn( new AuthenticationResult("druid", "druid", null) diff --git a/server/src/test/java/io/druid/server/http/security/ResourceFilterTestHelper.java b/server/src/test/java/io/druid/server/http/security/ResourceFilterTestHelper.java index 921013d24ad2..d94b4a83ea0a 100644 --- a/server/src/test/java/io/druid/server/http/security/ResourceFilterTestHelper.java +++ b/server/src/test/java/io/druid/server/http/security/ResourceFilterTestHelper.java @@ -110,6 +110,7 @@ public MultivaluedMap getMatrixParameters() ) ).anyTimes(); EasyMock.expect(request.getMethod()).andReturn(requestMethod).anyTimes(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).anyTimes(); AuthenticationResult authenticationResult = new AuthenticationResult("druid", "druid", null); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)) @@ -182,7 +183,7 @@ public void configure(Binder binder) for (Key key : mockableKeys) { binder.bind((Key) key).toInstance(EasyMock.createNiceMock(key.getTypeLiteral().getRawType())); } - binder.bind(AuthConfig.class).toInstance(new AuthConfig(null, null)); + binder.bind(AuthConfig.class).toInstance(new AuthConfig()); } } ); diff --git a/server/src/test/java/io/druid/server/http/security/SecuritySanityCheckFilterTest.java b/server/src/test/java/io/druid/server/http/security/SecuritySanityCheckFilterTest.java index 891afc912343..92bc07b4fb13 100644 --- a/server/src/test/java/io/druid/server/http/security/SecuritySanityCheckFilterTest.java +++ b/server/src/test/java/io/druid/server/http/security/SecuritySanityCheckFilterTest.java @@ -41,6 +41,7 @@ public void testValidRequest() throws Exception FilterChain filterChain = EasyMock.createStrictMock(FilterChain.class); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(null).once(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).once(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn(null).once(); filterChain.doFilter(req, resp); EasyMock.expectLastCall().once(); @@ -61,6 +62,7 @@ public void testInvalidRequest() throws Exception AuthenticationResult authenticationResult = new AuthenticationResult("does-not-belong", "does-not-belong", null); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)).andReturn(true).once(); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHENTICATION_RESULT)).andReturn(authenticationResult).once(); EasyMock.expect(resp.getOutputStream()).andReturn(outputStream).once(); resp.setStatus(403); diff --git a/services/src/main/java/io/druid/cli/CliMiddleManager.java b/services/src/main/java/io/druid/cli/CliMiddleManager.java index 58ef96bb20eb..ed4d44a27621 100644 --- a/services/src/main/java/io/druid/cli/CliMiddleManager.java +++ b/services/src/main/java/io/druid/cli/CliMiddleManager.java @@ -97,7 +97,9 @@ public void configure(Binder binder) binder.bind(WorkerCuratorCoordinator.class).in(ManageLifecycle.class); LifecycleModule.register(binder, WorkerTaskMonitor.class); - binder.bind(JettyServerInitializer.class).toInstance(new MiddleManagerJettyServerInitializer()); + binder.bind(JettyServerInitializer.class) + .to(MiddleManagerJettyServerInitializer.class) + .in(LazySingleton.class); Jerseys.addResource(binder, WorkerResource.class); Jerseys.addResource(binder, TaskManagementResource.class); diff --git a/services/src/main/java/io/druid/cli/CliOverlord.java b/services/src/main/java/io/druid/cli/CliOverlord.java index 51ca382590d4..33f842c2e4e6 100644 --- a/services/src/main/java/io/druid/cli/CliOverlord.java +++ b/services/src/main/java/io/druid/cli/CliOverlord.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; +import com.google.inject.Inject; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.Module; @@ -91,6 +92,7 @@ import io.druid.server.http.RedirectInfo; import io.druid.server.initialization.jetty.JettyServerInitUtils; import io.druid.server.initialization.jetty.JettyServerInitializer; +import io.druid.server.security.AuthConfig; import io.druid.server.security.AuthenticationUtils; import io.druid.server.security.Authenticator; import io.druid.server.security.AuthenticatorMapper; @@ -194,7 +196,9 @@ public void configure(Binder binder) if (standalone) { binder.bind(RedirectFilter.class).in(LazySingleton.class); binder.bind(RedirectInfo.class).to(OverlordRedirectInfo.class).in(LazySingleton.class); - binder.bind(JettyServerInitializer.class).toInstance(new OverlordJettyServerInitializer()); + binder.bind(JettyServerInitializer.class) + .to(OverlordJettyServerInitializer.class) + .in(LazySingleton.class); } Jerseys.addResource(binder, OverlordResource.class); @@ -302,6 +306,14 @@ private void configureOverlordHelpers(Binder binder) */ private static class OverlordJettyServerInitializer implements JettyServerInitializer { + private final AuthConfig authConfig; + + @Inject + OverlordJettyServerInitializer(AuthConfig authConfig) + { + this.authConfig = authConfig; + } + @Override public void initialize(Server server, Injector injector) { @@ -330,6 +342,7 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + AuthenticationUtils.addNoopAuthorizationFilters(root, authConfig.getUnsecuredPaths()); authenticators = authenticatorMapper.getAuthenticatorChain(); AuthenticationUtils.addAuthenticationFilterChain(root, authenticators); diff --git a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java index 7a0562f3dfd4..c9bc725e5e95 100644 --- a/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/CoordinatorJettyServerInitializer.java @@ -71,12 +71,14 @@ class CoordinatorJettyServerInitializer implements JettyServerInitializer private final DruidCoordinatorConfig config; private final boolean beOverlord; + private final AuthConfig authConfig; @Inject - CoordinatorJettyServerInitializer(DruidCoordinatorConfig config, Properties properties) + CoordinatorJettyServerInitializer(DruidCoordinatorConfig config, Properties properties, AuthConfig authConfig) { this.config = config; this.beOverlord = CliCoordinator.isOverlord(properties); + this.authConfig = authConfig; } @Override @@ -117,6 +119,7 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + AuthenticationUtils.addNoopAuthorizationFilters(root, authConfig.getUnsecuredPaths()); if (beOverlord) { AuthenticationUtils.addNoopAuthorizationFilters(root, CliOverlord.UNSECURED_PATHS); diff --git a/services/src/main/java/io/druid/cli/MiddleManagerJettyServerInitializer.java b/services/src/main/java/io/druid/cli/MiddleManagerJettyServerInitializer.java index 2eaa4d154fe1..9408acc588bf 100644 --- a/services/src/main/java/io/druid/cli/MiddleManagerJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/MiddleManagerJettyServerInitializer.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; +import com.google.inject.Inject; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.servlet.GuiceFilter; @@ -52,6 +53,14 @@ class MiddleManagerJettyServerInitializer implements JettyServerInitializer "/status/health" ); + private final AuthConfig authConfig; + + @Inject + public MiddleManagerJettyServerInitializer(AuthConfig authConfig) + { + this.authConfig = authConfig; + } + @Override public void initialize(Server server, Injector injector) { @@ -67,6 +76,7 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + AuthenticationUtils.addNoopAuthorizationFilters(root, authConfig.getUnsecuredPaths()); authenticators = authenticatorMapper.getAuthenticatorChain(); AuthenticationUtils.addAuthenticationFilterChain(root, authenticators); diff --git a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java index e9ddfb6f1c63..b079041b2ad9 100644 --- a/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/QueryJettyServerInitializer.java @@ -33,6 +33,7 @@ import io.druid.server.initialization.jetty.JettyServerInitUtils; import io.druid.server.initialization.jetty.JettyServerInitializer; import io.druid.server.initialization.jetty.LimitRequestsFilter; +import io.druid.server.security.AuthConfig; import io.druid.server.security.AuthenticationUtils; import io.druid.server.security.Authenticator; import io.druid.server.security.AuthenticatorMapper; @@ -61,11 +62,14 @@ public class QueryJettyServerInitializer implements JettyServerInitializer private final ServerConfig serverConfig; + private final AuthConfig authConfig; + @Inject - public QueryJettyServerInitializer(Set extensionHandlers, ServerConfig serverConfig) + public QueryJettyServerInitializer(Set extensionHandlers, ServerConfig serverConfig, AuthConfig authConfig) { this.extensionHandlers = ImmutableList.copyOf(extensionHandlers); this.serverConfig = serverConfig; + this.authConfig = authConfig; } @Override @@ -96,6 +100,7 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + AuthenticationUtils.addNoopAuthorizationFilters(root, authConfig.getUnsecuredPaths()); authenticators = authenticatorMapper.getAuthenticatorChain(); AuthenticationUtils.addAuthenticationFilterChain(root, authenticators); diff --git a/services/src/main/java/io/druid/cli/RouterJettyServerInitializer.java b/services/src/main/java/io/druid/cli/RouterJettyServerInitializer.java index e67ae5889fc8..ec0ca471dfef 100644 --- a/services/src/main/java/io/druid/cli/RouterJettyServerInitializer.java +++ b/services/src/main/java/io/druid/cli/RouterJettyServerInitializer.java @@ -34,6 +34,7 @@ import io.druid.server.initialization.jetty.JettyServerInitializer; import io.druid.server.router.ManagementProxyConfig; import io.druid.server.router.Router; +import io.druid.server.security.AuthConfig; import io.druid.server.security.AuthenticationUtils; import io.druid.server.security.Authenticator; import io.druid.server.security.AuthenticatorMapper; @@ -63,6 +64,7 @@ public class RouterJettyServerInitializer implements JettyServerInitializer private final ManagementProxyConfig managementProxyConfig; private final AsyncQueryForwardingServlet asyncQueryForwardingServlet; private final AsyncManagementForwardingServlet asyncManagementForwardingServlet; + private final AuthConfig authConfig; @Inject public RouterJettyServerInitializer( @@ -70,7 +72,8 @@ public RouterJettyServerInitializer( @Global DruidHttpClientConfig globalHttpClientConfig, ManagementProxyConfig managementProxyConfig, AsyncQueryForwardingServlet asyncQueryForwardingServlet, - AsyncManagementForwardingServlet asyncManagementForwardingServlet + AsyncManagementForwardingServlet asyncManagementForwardingServlet, + AuthConfig authConfig ) { this.routerHttpClientConfig = routerHttpClientConfig; @@ -78,6 +81,7 @@ public RouterJettyServerInitializer( this.managementProxyConfig = managementProxyConfig; this.asyncQueryForwardingServlet = asyncQueryForwardingServlet; this.asyncManagementForwardingServlet = asyncManagementForwardingServlet; + this.authConfig = authConfig; } @Override @@ -105,6 +109,7 @@ public void initialize(Server server, Injector injector) // perform no-op authorization for these resources AuthenticationUtils.addNoopAuthorizationFilters(root, UNSECURED_PATHS); + AuthenticationUtils.addNoopAuthorizationFilters(root, authConfig.getUnsecuredPaths()); final List authenticators = authenticatorMapper.getAuthenticatorChain(); AuthenticationUtils.addAuthenticationFilterChain(root, authenticators); diff --git a/sql/src/test/java/io/druid/sql/calcite/http/SqlResourceTest.java b/sql/src/test/java/io/druid/sql/calcite/http/SqlResourceTest.java index 809e758f0168..c80a4d0f5405 100644 --- a/sql/src/test/java/io/druid/sql/calcite/http/SqlResourceTest.java +++ b/sql/src/test/java/io/druid/sql/calcite/http/SqlResourceTest.java @@ -86,6 +86,7 @@ public void setUp() throws Exception final DruidOperatorTable operatorTable = CalciteTests.createOperatorTable(); final ExprMacroTable macroTable = CalciteTests.createExprMacroTable(); req = EasyMock.createStrictMock(HttpServletRequest.class); + EasyMock.expect(req.getAttribute(AuthConfig.DRUID_ALLOW_UNSECURED_PATH)).andReturn(null).anyTimes(); EasyMock.expect(req.getAttribute(AuthConfig.DRUID_AUTHORIZATION_CHECKED)) .andReturn(null) .anyTimes(); From 72d6dcda4fb674261f14e21f143d39aaedb68dc9 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 11 Apr 2018 20:39:39 -0400 Subject: [PATCH 41/67] ParallelCombiner: Fix buffer leak on exception in "combine". (#5630) Once a buffer is acquired, we need to make sure to release it if an exception is thrown before the closeable iterator is created. --- .../epinephelinae/ParallelCombiner.java | 83 +++++++++++-------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/processing/src/main/java/io/druid/query/groupby/epinephelinae/ParallelCombiner.java b/processing/src/main/java/io/druid/query/groupby/epinephelinae/ParallelCombiner.java index bc786490c470..e8db98d31da1 100644 --- a/processing/src/main/java/io/druid/query/groupby/epinephelinae/ParallelCombiner.java +++ b/processing/src/main/java/io/druid/query/groupby/epinephelinae/ParallelCombiner.java @@ -136,44 +136,55 @@ public CloseableIterator> combine( ) { // CombineBuffer is initialized when this method is called and closed after the result iterator is done + final Closer closer = Closer.create(); final ResourceHolder combineBufferHolder = combineBufferSupplier.get(); - final ByteBuffer combineBuffer = combineBufferHolder.get(); - final int minimumRequiredBufferCapacity = StreamingMergeSortedGrouper.requiredBufferCapacity( - combineKeySerdeFactory.factorizeWithDictionary(mergedDictionary), - combiningFactories - ); - // We want to maximize the parallelism while the size of buffer slice is greater than the minimum buffer size - // required by StreamingMergeSortedGrouper. Here, we find the leafCombineDegree of the cominbing tree and the - // required number of buffers maximizing the parallelism. - final Pair degreeAndNumBuffers = findLeafCombineDegreeAndNumBuffers( - combineBuffer, - minimumRequiredBufferCapacity, - concurrencyHint, - sortedIterators.size() - ); + closer.register(combineBufferHolder); - final int leafCombineDegree = degreeAndNumBuffers.lhs; - final int numBuffers = degreeAndNumBuffers.rhs; - final int sliceSize = combineBuffer.capacity() / numBuffers; + try { + final ByteBuffer combineBuffer = combineBufferHolder.get(); + final int minimumRequiredBufferCapacity = StreamingMergeSortedGrouper.requiredBufferCapacity( + combineKeySerdeFactory.factorizeWithDictionary(mergedDictionary), + combiningFactories + ); + // We want to maximize the parallelism while the size of buffer slice is greater than the minimum buffer size + // required by StreamingMergeSortedGrouper. Here, we find the leafCombineDegree of the cominbing tree and the + // required number of buffers maximizing the parallelism. + final Pair degreeAndNumBuffers = findLeafCombineDegreeAndNumBuffers( + combineBuffer, + minimumRequiredBufferCapacity, + concurrencyHint, + sortedIterators.size() + ); - final Supplier bufferSupplier = createCombineBufferSupplier(combineBuffer, numBuffers, sliceSize); + final int leafCombineDegree = degreeAndNumBuffers.lhs; + final int numBuffers = degreeAndNumBuffers.rhs; + final int sliceSize = combineBuffer.capacity() / numBuffers; - final Pair>>, List> combineIteratorAndFutures = buildCombineTree( - sortedIterators, - bufferSupplier, - combiningFactories, - leafCombineDegree, - mergedDictionary - ); + final Supplier bufferSupplier = createCombineBufferSupplier(combineBuffer, numBuffers, sliceSize); - final CloseableIterator> combineIterator = Iterables.getOnlyElement(combineIteratorAndFutures.lhs); - final List combineFutures = combineIteratorAndFutures.rhs; + final Pair>>, List> combineIteratorAndFutures = buildCombineTree( + sortedIterators, + bufferSupplier, + combiningFactories, + leafCombineDegree, + mergedDictionary + ); - final Closer closer = Closer.create(); - closer.register(combineBufferHolder); - closer.register(() -> checkCombineFutures(combineFutures)); + final CloseableIterator> combineIterator = Iterables.getOnlyElement(combineIteratorAndFutures.lhs); + final List combineFutures = combineIteratorAndFutures.rhs; + closer.register(() -> checkCombineFutures(combineFutures)); - return CloseableIterators.wrap(combineIterator, closer); + return CloseableIterators.wrap(combineIterator, closer); + } + catch (Throwable t) { + try { + closer.close(); + } + catch (Throwable t2) { + t.addSuppressed(t2); + } + throw t; + } } private static void checkCombineFutures(List combineFutures) @@ -289,11 +300,11 @@ private int computeRequiredBufferNum(int numChildNodes, int combineDegree) * Recursively build a combining tree in a bottom-up manner. Each node of the tree is a task that combines input * iterators asynchronously. * - * @param childIterators all iterators of the child level - * @param bufferSupplier combining buffer supplier - * @param combiningFactories array of combining aggregator factories - * @param combineDegree combining degree for the current level - * @param dictionary merged dictionary + * @param childIterators all iterators of the child level + * @param bufferSupplier combining buffer supplier + * @param combiningFactories array of combining aggregator factories + * @param combineDegree combining degree for the current level + * @param dictionary merged dictionary * * @return a pair of a list of iterators of the current level in the combining tree and a list of futures of all * executed combining tasks From d709d1a59fe803cb1672cdf870aab2f79e700040 Mon Sep 17 00:00:00 2001 From: Caroline1000 Date: Wed, 11 Apr 2018 17:44:31 -0700 Subject: [PATCH 42/67] correct overlord console header. Because it's not the coordinator console, it's the overlord console. (#5627) --- indexing-service/src/main/resources/indexer_static/console.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/indexing-service/src/main/resources/indexer_static/console.html b/indexing-service/src/main/resources/indexer_static/console.html index d397b1bbbea8..bfc3dd546272 100644 --- a/indexing-service/src/main/resources/indexer_static/console.html +++ b/indexing-service/src/main/resources/indexer_static/console.html @@ -36,7 +36,7 @@
-
Coordinator Console
+
Overlord Console