From de3923ab4a5c15e728a69ef2d241edb1795ff438 Mon Sep 17 00:00:00 2001 From: LiuXuxin Date: Wed, 31 Aug 2022 20:40:17 +0800 Subject: [PATCH 01/31] temp --- .../file/metadata/AlignedChunkMetadata.java | 5 + .../tsfile/file/metadata/ChunkMetadata.java | 35 +++++++ .../tsfile/file/metadata/IChunkMetadata.java | 2 + .../writer/MemoryControlTsFileIOWriter.java | 96 +++++++++++++++++++ .../tsfile/write/writer/TsFileIOWriter.java | 34 +++++-- 5 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java index a69e61b9b8050..606fe764295f5 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java @@ -23,6 +23,7 @@ import org.apache.iotdb.tsfile.read.common.TimeRange; import org.apache.iotdb.tsfile.read.controller.IChunkLoader; +import java.io.IOException; import java.io.OutputStream; import java.util.List; @@ -183,6 +184,10 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) { throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); } + public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) throws IOException { + return 0; + } + @Override public byte getMask() { return 0; diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java index 831f8cd120fde..8c1fab686a446 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java @@ -22,6 +22,7 @@ import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.read.common.TimeRange; import org.apache.iotdb.tsfile.read.controller.IChunkLoader; +import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.FilePathUtils; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.RamUsageEstimator; @@ -161,6 +162,40 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) th return byteLen; } + /** + * Serialize the chunk metadata with full path, data type and statistic + * + * @param outputStream OutputStream + * @param seriesFullPath the full path of the chunk metadata + * @return length + * @throws IOException + */ + public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) + throws IOException { + int byteLen = 0; + byteLen += ReadWriteIOUtils.write(new Binary(seriesFullPath), outputStream); + byteLen += ReadWriteIOUtils.write(tsDataType, outputStream); + byteLen += this.serializeTo(outputStream, true); + return byteLen; + } + + /** + * Deserialize with full info, the result is store in param chunkMetadata + * + * @param buffer ByteBuffer + * @param chunkMetadata ChunkMetadata to store the result + * @return the full path of the measurement + * @throws IOException + */ + public static String deserializeWithFullInfo(ByteBuffer buffer, ChunkMetadata chunkMetadata) + throws IOException { + String fullPath = ReadWriteIOUtils.readBinary(buffer).toString(); + chunkMetadata.tsDataType = TSDataType.deserialize(ReadWriteIOUtils.readByte(buffer)); + chunkMetadata.offsetOfChunkHeader = ReadWriteIOUtils.readLong(buffer); + chunkMetadata.statistics = Statistics.deserialize(buffer, chunkMetadata.tsDataType); + return fullPath; + } + /** * deserialize from ByteBuffer. * diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java index 1cc819fd52ddf..9d8c7a8b804db 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java @@ -73,5 +73,7 @@ public interface IChunkMetadata { int serializeTo(OutputStream outputStream, boolean serializeStatistic) throws IOException; + int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) throws IOException; + byte getMask(); } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java new file mode 100644 index 0000000000000..fd00260ff9abb --- /dev/null +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.write.writer; + +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class MemoryControlTsFileIOWriter extends TsFileIOWriter { + private static final Logger LOG = LoggerFactory.getLogger(MemoryControlTsFileIOWriter.class); + private long maxMetadataSize; + private long currentChunkMetadataSize = 0L; + private File chunkMetadataTempFile; + private LocalTsFileOutput tempOutput; + private List sortedSegmentPosition = new ArrayList<>(); + + public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; + private static final byte VECTOR_TYPE = 1; + private static final byte NORMAL_TYPE = 2; + + public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { + super(file); + this.maxMetadataSize = maxMetadataSize; + this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_PREFIX); + } + + @Override + public void endCurrentChunk() { + currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); + super.endCurrentChunk(); + if (currentChunkMetadataSize > maxMetadataSize) { + // TODO: Sort and flush the chunk metadata to outside + try { + sortAndFlushChunkMetadata(); + } catch (IOException e) { + LOG.error("Meets exception when flushing metadata to temp files", e); + } + } + } + + private void sortAndFlushChunkMetadata() throws IOException { + // group by series + Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); + if (tempOutput == null) { + tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); + } + sortedSegmentPosition.add(tempOutput.getPosition()); + for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { + List iChunkMetadataList = entry.getValue(); + if (iChunkMetadataList.size() > 0 + && iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { + // this is a vector chunk + ReadWriteIOUtils.write(VECTOR_TYPE, tempOutput); + ReadWriteIOUtils.write(chunkMetadataList.size(), tempOutput); + } else { + ReadWriteIOUtils.write(NORMAL_TYPE, tempOutput); + } + for (IChunkMetadata chunkMetadata : iChunkMetadataList) { + chunkMetadata.serializeTo(tempOutput, true); + } + } + } + + @Override + public void endFile() { + // super.endFile(); + } +} diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 2f865f297f081..59a8ec236d629 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -76,7 +76,7 @@ public class TsFileIOWriter implements AutoCloseable { protected File file; // current flushed Chunk - private ChunkMetadata currentChunkMetadata; + protected ChunkMetadata currentChunkMetadata; // current flushed ChunkGroup protected List chunkMetadataList = new ArrayList<>(); // all flushed ChunkGroups @@ -240,6 +240,28 @@ public void endCurrentChunk() { currentChunkMetadata = null; } + protected Map> groupChunkMetadataListBySeries() { + // group ChunkMetadata by series + Map> chunkMetadataListMap = new TreeMap<>(); + + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + List chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); + for (IChunkMetadata chunkMetadata : chunkMetadatas) { + Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); + chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + } + } + + if (chunkMetadataList != null && chunkMetadataList.size() > 0) { + ChunkMetadata chunkMetadata = chunkMetadataList.get(0); + Path series = new Path(currentChunkGroupDeviceId, chunkMetadata.getMeasurementUid()); + chunkMetadataListMap + .computeIfAbsent(series, k -> new ArrayList<>()) + .addAll(chunkMetadataList); + } + return chunkMetadataListMap; + } + /** * write {@linkplain TsFileMetadata TSFileMetaData} to output stream and close it. * @@ -253,15 +275,7 @@ public void endFile() throws IOException { ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); // group ChunkMetadata by series - Map> chunkMetadataListMap = new TreeMap<>(); - - for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { - List chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); - for (IChunkMetadata chunkMetadata : chunkMetadatas) { - Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); - chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); - } - } + Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); MetadataIndexNode metadataIndex = flushMetadataIndex(chunkMetadataListMap); TsFileMetadata tsFileMetaData = new TsFileMetadata(); From dca1b4c8833532a386b7c6a882bac3869063454e Mon Sep 17 00:00:00 2001 From: LiuXuxin Date: Thu, 1 Sep 2022 18:41:44 +0800 Subject: [PATCH 02/31] finish the metadata flush and read process in memory control writer and corresponding test --- .../file/metadata/AlignedChunkMetadata.java | 36 +- .../writer/MemoryControlTsFileIOWriter.java | 109 +++++- .../MemoryControlTsFileIOWriterTest.java | 311 ++++++++++++++++++ 3 files changed, 444 insertions(+), 12 deletions(-) create mode 100644 tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java index 606fe764295f5..2b7e238a669dd 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java @@ -22,17 +22,20 @@ import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.read.common.TimeRange; import org.apache.iotdb.tsfile.read.controller.IChunkLoader; +import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; import java.io.IOException; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; public class AlignedChunkMetadata implements IChunkMetadata { // ChunkMetadata for time column - private final IChunkMetadata timeChunkMetadata; + private IChunkMetadata timeChunkMetadata; // ChunkMetadata for all subSensors in the vector - private final List valueChunkMetadataList; + private List valueChunkMetadataList; /** ChunkLoader of metadata, used to create IChunkReader */ private IChunkLoader chunkLoader; @@ -43,6 +46,8 @@ public AlignedChunkMetadata( this.valueChunkMetadataList = valueChunkMetadataList; } + public AlignedChunkMetadata() {} + @Override public Statistics getStatistics() { return valueChunkMetadataList.size() == 1 && valueChunkMetadataList.get(0) != null @@ -184,8 +189,31 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) { throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); } - public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) throws IOException { - return 0; + public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) + throws IOException { + int byteLen = 0; + byteLen += ReadWriteIOUtils.write(valueChunkMetadataList.size() + 1, outputStream); + byteLen += timeChunkMetadata.serializeWithFullInfo(outputStream, seriesFullPath); + for (IChunkMetadata chunkMetadata : valueChunkMetadataList) { + byteLen += chunkMetadata.serializeWithFullInfo(outputStream, ""); + } + return byteLen; + } + + public static String deserializeWithFullInfo( + ByteBuffer buffer, AlignedChunkMetadata alignedChunkMetadata) throws IOException { + int chunkMetadataNum = ReadWriteIOUtils.readInt(buffer); + alignedChunkMetadata.timeChunkMetadata = new ChunkMetadata(); + alignedChunkMetadata.valueChunkMetadataList = new ArrayList<>(); + String deviceId = + ChunkMetadata.deserializeWithFullInfo( + buffer, (ChunkMetadata) alignedChunkMetadata.timeChunkMetadata); + for (int i = 1; i < chunkMetadataNum; ++i) { + ChunkMetadata metadata = new ChunkMetadata(); + ChunkMetadata.deserializeWithFullInfo(buffer, metadata); + alignedChunkMetadata.valueChunkMetadataList.add(metadata); + } + return deviceId; } @Override diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index fd00260ff9abb..5a631212954da 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -19,9 +19,14 @@ package org.apache.iotdb.tsfile.write.writer; +import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; +import org.apache.iotdb.tsfile.utils.Pair; +import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; import org.slf4j.Logger; @@ -30,6 +35,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -39,17 +45,23 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { private long maxMetadataSize; private long currentChunkMetadataSize = 0L; private File chunkMetadataTempFile; - private LocalTsFileOutput tempOutput; + protected LocalTsFileOutput tempOutput; + protected LocalTsFileInput tempInput; + private final boolean needSort; private List sortedSegmentPosition = new ArrayList<>(); + private ByteBuffer typeBuffer = ByteBuffer.allocate(1); + private ByteBuffer sizeBuffer = ByteBuffer.allocate(4); public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; private static final byte VECTOR_TYPE = 1; private static final byte NORMAL_TYPE = 2; - public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { + public MemoryControlTsFileIOWriter(File file, long maxMetadataSize, boolean needSort) + throws IOException { super(file); this.maxMetadataSize = maxMetadataSize; this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_PREFIX); + this.needSort = needSort; } @Override @@ -66,31 +78,112 @@ public void endCurrentChunk() { } } - private void sortAndFlushChunkMetadata() throws IOException { + protected void sortAndFlushChunkMetadata() throws IOException { // group by series Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); if (tempOutput == null) { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } sortedSegmentPosition.add(tempOutput.getPosition()); + // the file structure in temp file will be + // ChunkType | chunkSize | chunkBuffer for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { + Path seriesPath = entry.getKey(); List iChunkMetadataList = entry.getValue(); if (iChunkMetadataList.size() > 0 && iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { // this is a vector chunk - ReadWriteIOUtils.write(VECTOR_TYPE, tempOutput); - ReadWriteIOUtils.write(chunkMetadataList.size(), tempOutput); + writeAlignedChunkMetadata(iChunkMetadataList, seriesPath); } else { - ReadWriteIOUtils.write(NORMAL_TYPE, tempOutput); + writeNormalChunkMetadata(iChunkMetadataList, seriesPath); } - for (IChunkMetadata chunkMetadata : iChunkMetadataList) { - chunkMetadata.serializeTo(tempOutput, true); + } + } + + private void writeAlignedChunkMetadata(List iChunkMetadataList, Path seriesPath) + throws IOException { + ReadWriteIOUtils.write(VECTOR_TYPE, tempOutput); + IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); + List currentValueChunk = new ArrayList<>(); + List alignedChunkMetadata = new ArrayList<>(); + for (int i = 1; i < iChunkMetadataList.size(); ++i) { + if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { + alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); + currentTimeChunk = iChunkMetadataList.get(i); + currentValueChunk = new ArrayList<>(); + } else { + currentValueChunk.add(iChunkMetadataList.get(i)); } } + if (currentValueChunk.size() > 0) { + alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); + } + for (IChunkMetadata chunkMetadata : alignedChunkMetadata) { + PublicBAOS buffer = new PublicBAOS(); + int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getDevice()); + ReadWriteIOUtils.write(size, tempOutput); + buffer.writeTo(tempOutput); + } + } + + private void writeNormalChunkMetadata(List iChunkMetadataList, Path seriesPath) + throws IOException { + ReadWriteIOUtils.write(NORMAL_TYPE, tempOutput); + for (IChunkMetadata chunkMetadata : iChunkMetadataList) { + PublicBAOS buffer = new PublicBAOS(); + int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getFullPath()); + ReadWriteIOUtils.write(size, tempOutput); + buffer.writeTo(tempOutput); + } + } + + protected Pair readNextChunkMetadata() throws IOException { + if (tempInput == null) { + tempInput = new LocalTsFileInput(chunkMetadataTempFile.toPath()); + } + byte type = readNextChunkMetadataType(); + int size = readNextChunkMetadataSize(); + ByteBuffer chunkBuffer = ByteBuffer.allocate(size); + ReadWriteIOUtils.readAsPossible(tempInput, chunkBuffer); + chunkBuffer.flip(); + if (type == NORMAL_TYPE) { + ChunkMetadata chunkMetadata = new ChunkMetadata(); + String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); + return new Pair<>(seriesPath, chunkMetadata); + } else { + AlignedChunkMetadata chunkMetadata = new AlignedChunkMetadata(); + String devicePath = AlignedChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); + return new Pair<>(devicePath, chunkMetadata); + } + } + + private byte readNextChunkMetadataType() throws IOException { + typeBuffer.clear(); + ReadWriteIOUtils.readAsPossible(tempInput, typeBuffer); + typeBuffer.flip(); + return ReadWriteIOUtils.readByte(typeBuffer); + } + + private int readNextChunkMetadataSize() throws IOException { + sizeBuffer.clear(); + ReadWriteIOUtils.readAsPossible(tempInput, sizeBuffer); + sizeBuffer.flip(); + return ReadWriteIOUtils.readInt(sizeBuffer); } @Override public void endFile() { // super.endFile(); } + + @Override + public void close() throws IOException { + super.close(); + if (tempInput != null) { + tempInput.close(); + } + if (tempOutput != null) { + this.tempOutput.close(); + } + } } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java new file mode 100644 index 0000000000000..e894901a0aeb0 --- /dev/null +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.write.writer; + +import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.utils.Binary; +import org.apache.iotdb.tsfile.utils.Pair; +import org.apache.iotdb.tsfile.utils.TsPrimitiveType; +import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; +import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; +import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; +import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; + +import org.apache.commons.io.FileUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +public class MemoryControlTsFileIOWriterTest extends MemoryControlTsFileIOWriter { + private static File testFile = new File("target", "1-1-0-0.tsfile"); + private static File emptyFile = new File("target", "temp"); + private static final int TEST_CHUNK_SIZE = 1000; + + @Before + public void setUp() throws IOException {} + + @After + public void tearDown() throws IOException { + this.close(); + FileUtils.delete(testFile); + FileUtils.delete( + new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX)); + FileUtils.delete(emptyFile); + } + + public MemoryControlTsFileIOWriterTest() throws IOException { + super(emptyFile, 1024, true); + } + + @Test + public void testSerializeAndDeserializeChunkMetadata() throws IOException { + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + List originChunkMetadataList = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = "root.sg.d" + i; + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + chunkWriter = generateIntData(j); + break; + case 1: + chunkWriter = generateBooleanData(j); + break; + case 2: + chunkWriter = generateFloatData(j); + break; + case 3: + chunkWriter = generateDoubleData(j); + break; + case 4: + default: + chunkWriter = generateTextData(j); + break; + } + chunkWriter.writeToFileWriter(writer); + } + originChunkMetadataList.addAll(writer.chunkMetadataList); + writer.endChunkGroup(); + } + writer.sortAndFlushChunkMetadata(); + writer.tempOutput.flush(); + + for (int i = 0; i < originChunkMetadataList.size(); ++i) { + Pair chunkMetadataPair = writer.readNextChunkMetadata(); + Assert.assertEquals("root.sg.d" + i / 5 + ".s" + i % 5, chunkMetadataPair.left); + Assert.assertEquals( + originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); + Assert.assertEquals( + originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); + Assert.assertEquals( + originChunkMetadataList.get(i).getDataType(), chunkMetadataPair.right.getDataType()); + Assert.assertEquals( + originChunkMetadataList.get(i).getStatistics(), + chunkMetadataPair.right.getStatistics()); + } + } + } + + @Test + public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + List originChunkMetadataList = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = "root.sg.d" + i; + writer.startChunkGroup(deviceId); + AlignedChunkWriterImpl chunkWriter = generateVectorData(i); + chunkWriter.writeToFileWriter(writer); + originChunkMetadataList.addAll(writer.chunkMetadataList); + writer.endChunkGroup(); + } + writer.sortAndFlushChunkMetadata(); + writer.tempOutput.flush(); + + List alignedChunkMetadata = new ArrayList<>(); + IChunkMetadata currentTimeChunkMetadata = originChunkMetadataList.get(0); + List currentValueChunkMetadata = new ArrayList<>(); + for (int i = 1; i < originChunkMetadataList.size(); ++i) { + if (originChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { + alignedChunkMetadata.add( + new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); + currentTimeChunkMetadata = originChunkMetadataList.get(i); + currentValueChunkMetadata = new ArrayList<>(); + } else { + currentValueChunkMetadata.add(originChunkMetadataList.get(i)); + } + } + if (currentValueChunkMetadata.size() > 0) { + alignedChunkMetadata.add( + new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); + } + + for (int i = 0; i < alignedChunkMetadata.size(); ++i) { + Pair chunkMetadataPair = writer.readNextChunkMetadata(); + Assert.assertEquals("root.sg.d" + i, chunkMetadataPair.left); + Assert.assertEquals( + alignedChunkMetadata.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); + Assert.assertEquals( + alignedChunkMetadata.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); + Assert.assertEquals( + alignedChunkMetadata.get(i).getDataType(), chunkMetadataPair.right.getDataType()); + Assert.assertEquals( + alignedChunkMetadata.get(i).getStatistics(), chunkMetadataPair.right.getStatistics()); + } + } + } + + @Test + public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + List originChunkMetadataList = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = "root.sg.d" + i; + writer.startChunkGroup(deviceId); + if (i % 2 == 0) { + // write normal series + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + chunkWriter = generateIntData(j); + break; + case 1: + chunkWriter = generateBooleanData(j); + break; + case 2: + chunkWriter = generateFloatData(j); + break; + case 3: + chunkWriter = generateDoubleData(j); + break; + case 4: + default: + chunkWriter = generateTextData(j); + break; + } + chunkWriter.writeToFileWriter(writer); + } + originChunkMetadataList.addAll(writer.chunkMetadataList); + } else { + // write vector + AlignedChunkWriterImpl chunkWriter = generateVectorData(i); + chunkWriter.writeToFileWriter(writer); + originChunkMetadataList.add( + new AlignedChunkMetadata( + writer.chunkMetadataList.get(0), + new ArrayList<>( + writer.chunkMetadataList.subList(1, writer.chunkMetadataList.size())))); + } + writer.endChunkGroup(); + } + writer.sortAndFlushChunkMetadata(); + writer.tempOutput.flush(); + + for (int i = 0, deviceCnt = 0; i < originChunkMetadataList.size(); ++i) { + Pair chunkMetadataPair = writer.readNextChunkMetadata(); + if (originChunkMetadataList.get(i) instanceof ChunkMetadata) { + Assert.assertEquals( + "root.sg.d" + deviceCnt + "." + originChunkMetadataList.get(i).getMeasurementUid(), + chunkMetadataPair.left); + } else { + deviceCnt++; + Assert.assertEquals("root.sg.d" + deviceCnt++, chunkMetadataPair.left); + } + Assert.assertEquals( + originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); + Assert.assertEquals( + originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); + Assert.assertEquals( + originChunkMetadataList.get(i).getDataType(), chunkMetadataPair.right.getDataType()); + Assert.assertEquals( + originChunkMetadataList.get(i).getStatistics(), + chunkMetadataPair.right.getStatistics()); + } + } + } + + private ChunkWriterImpl generateIntData(int idx) { + ChunkWriterImpl chunkWriter = + new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.INT64)); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + chunkWriter.write(i, random.nextLong()); + } + return chunkWriter; + } + + private ChunkWriterImpl generateFloatData(int idx) { + ChunkWriterImpl chunkWriter = + new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.FLOAT)); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + chunkWriter.write(i, random.nextFloat()); + } + return chunkWriter; + } + + private ChunkWriterImpl generateDoubleData(int idx) { + ChunkWriterImpl chunkWriter = + new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.DOUBLE)); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + chunkWriter.write(i, random.nextDouble()); + } + return chunkWriter; + } + + private ChunkWriterImpl generateBooleanData(int idx) { + ChunkWriterImpl chunkWriter = + new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.BOOLEAN)); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + chunkWriter.write(i, random.nextBoolean()); + } + return chunkWriter; + } + + private AlignedChunkWriterImpl generateVectorData(int idx) { + List measurementSchemas = new ArrayList<>(); + measurementSchemas.add(new MeasurementSchema("", TSDataType.INT32)); + measurementSchemas.add(new MeasurementSchema("", TSDataType.INT64)); + measurementSchemas.add(new MeasurementSchema("", TSDataType.FLOAT)); + measurementSchemas.add(new MeasurementSchema("", TSDataType.DOUBLE)); + measurementSchemas.add(new MeasurementSchema("", TSDataType.BOOLEAN)); + measurementSchemas.add(new MeasurementSchema("", TSDataType.TEXT)); + AlignedChunkWriterImpl chunkWriter = new AlignedChunkWriterImpl(measurementSchemas); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + TsPrimitiveType[] points = new TsPrimitiveType[6]; + points[0] = new TsPrimitiveType.TsInt(random.nextInt()); + points[1] = new TsPrimitiveType.TsLong(random.nextLong()); + points[2] = new TsPrimitiveType.TsFloat(random.nextFloat()); + points[3] = new TsPrimitiveType.TsDouble(random.nextDouble()); + points[4] = new TsPrimitiveType.TsBoolean(random.nextBoolean()); + points[5] = new TsPrimitiveType.TsBinary(new Binary(String.valueOf(random.nextDouble()))); + chunkWriter.write(i, points); + } + return chunkWriter; + } + + private ChunkWriterImpl generateTextData(int idx) { + ChunkWriterImpl chunkWriter = + new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.TEXT)); + Random random = new Random(); + for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + chunkWriter.write(i, new Binary(String.valueOf(random.nextDouble()))); + } + return chunkWriter; + } +} From adecc618e52ee63601bd174bfb56fbba6010b6d9 Mon Sep 17 00:00:00 2001 From: LiuXuxin Date: Mon, 5 Sep 2022 17:54:27 +0800 Subject: [PATCH 03/31] temp for external sort --- .../writer/MemoryControlTsFileIOWriter.java | 296 +++++++++++++----- .../MemoryControlTsFileIOWriterTest.java | 25 +- 2 files changed, 247 insertions(+), 74 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 5a631212954da..e84970ced18c5 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -29,6 +29,7 @@ import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,23 +37,25 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Queue; public class MemoryControlTsFileIOWriter extends TsFileIOWriter { private static final Logger LOG = LoggerFactory.getLogger(MemoryControlTsFileIOWriter.class); - private long maxMetadataSize; - private long currentChunkMetadataSize = 0L; - private File chunkMetadataTempFile; + protected long maxMetadataSize; + protected long currentChunkMetadataSize = 0L; + protected File chunkMetadataTempFile; protected LocalTsFileOutput tempOutput; - protected LocalTsFileInput tempInput; - private final boolean needSort; - private List sortedSegmentPosition = new ArrayList<>(); - private ByteBuffer typeBuffer = ByteBuffer.allocate(1); - private ByteBuffer sizeBuffer = ByteBuffer.allocate(4); + protected final boolean needSort; + protected Queue sortedSegmentPosition = new ArrayDeque<>(); public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; + private static final String SORTING_TEMP_FILE = ".scmt"; private static final byte VECTOR_TYPE = 1; private static final byte NORMAL_TYPE = 2; @@ -69,7 +72,6 @@ public void endCurrentChunk() { currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); super.endCurrentChunk(); if (currentChunkMetadataSize > maxMetadataSize) { - // TODO: Sort and flush the chunk metadata to outside try { sortAndFlushChunkMetadata(); } catch (IOException e) { @@ -90,100 +92,252 @@ protected void sortAndFlushChunkMetadata() throws IOException { for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { Path seriesPath = entry.getKey(); List iChunkMetadataList = entry.getValue(); - if (iChunkMetadataList.size() > 0 - && iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { - // this is a vector chunk - writeAlignedChunkMetadata(iChunkMetadataList, seriesPath); - } else { - writeNormalChunkMetadata(iChunkMetadataList, seriesPath); - } + writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); } } - private void writeAlignedChunkMetadata(List iChunkMetadataList, Path seriesPath) + private void writeChunkMetadata( + List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - ReadWriteIOUtils.write(VECTOR_TYPE, tempOutput); - IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); - List currentValueChunk = new ArrayList<>(); - List alignedChunkMetadata = new ArrayList<>(); - for (int i = 1; i < iChunkMetadataList.size(); ++i) { - if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { + if (iChunkMetadataList.size() == 0) { + return; + } + if (iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { + IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); + List currentValueChunk = new ArrayList<>(); + List alignedChunkMetadata = new ArrayList<>(); + for (int i = 1; i < iChunkMetadataList.size(); ++i) { + if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { + alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); + currentTimeChunk = iChunkMetadataList.get(i); + currentValueChunk = new ArrayList<>(); + } else { + currentValueChunk.add(iChunkMetadataList.get(i)); + } + } + if (currentValueChunk.size() > 0) { alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - currentTimeChunk = iChunkMetadataList.get(i); - currentValueChunk = new ArrayList<>(); - } else { - currentValueChunk.add(iChunkMetadataList.get(i)); } + writeAlignedChunkMetadata(alignedChunkMetadata, seriesPath, output); + } else { + writeNormalChunkMetadata(iChunkMetadataList, seriesPath, output); } - if (currentValueChunk.size() > 0) { - alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - } - for (IChunkMetadata chunkMetadata : alignedChunkMetadata) { + } + + private void writeAlignedChunkMetadata( + List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) + throws IOException { + ReadWriteIOUtils.write(VECTOR_TYPE, output); + for (IChunkMetadata chunkMetadata : iChunkMetadataList) { PublicBAOS buffer = new PublicBAOS(); int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getDevice()); - ReadWriteIOUtils.write(size, tempOutput); - buffer.writeTo(tempOutput); + ReadWriteIOUtils.write(size, output); + buffer.writeTo(output); } } - private void writeNormalChunkMetadata(List iChunkMetadataList, Path seriesPath) + private void writeNormalChunkMetadata( + List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - ReadWriteIOUtils.write(NORMAL_TYPE, tempOutput); + ReadWriteIOUtils.write(NORMAL_TYPE, output); for (IChunkMetadata chunkMetadata : iChunkMetadataList) { PublicBAOS buffer = new PublicBAOS(); int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getFullPath()); - ReadWriteIOUtils.write(size, tempOutput); - buffer.writeTo(tempOutput); + ReadWriteIOUtils.write(size, output); + buffer.writeTo(output); } } - protected Pair readNextChunkMetadata() throws IOException { - if (tempInput == null) { - tempInput = new LocalTsFileInput(chunkMetadataTempFile.toPath()); - } - byte type = readNextChunkMetadataType(); - int size = readNextChunkMetadataSize(); - ByteBuffer chunkBuffer = ByteBuffer.allocate(size); - ReadWriteIOUtils.readAsPossible(tempInput, chunkBuffer); - chunkBuffer.flip(); - if (type == NORMAL_TYPE) { - ChunkMetadata chunkMetadata = new ChunkMetadata(); - String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); - return new Pair<>(seriesPath, chunkMetadata); + @Override + public void endFile() throws IOException { + if (this.sortedSegmentPosition.size() > 0) { + // there is some chunk metadata already been written to the disk + sortAndFlushChunkMetadata(); + tempOutput.close(); } else { - AlignedChunkMetadata chunkMetadata = new AlignedChunkMetadata(); - String devicePath = AlignedChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); - return new Pair<>(devicePath, chunkMetadata); + // sort the chunk metadata in memory, and just close the file + tempOutput.close(); + super.endFile(); + return; } - } - private byte readNextChunkMetadataType() throws IOException { - typeBuffer.clear(); - ReadWriteIOUtils.readAsPossible(tempInput, typeBuffer); - typeBuffer.flip(); - return ReadWriteIOUtils.readByte(typeBuffer); - } + if (needSort) { + externalSort(); + } - private int readNextChunkMetadataSize() throws IOException { - sizeBuffer.clear(); - ReadWriteIOUtils.readAsPossible(tempInput, sizeBuffer); - sizeBuffer.flip(); - return ReadWriteIOUtils.readInt(sizeBuffer); + // super.endFile(); } - @Override - public void endFile() { - // super.endFile(); + protected void externalSort() throws IOException { + ChunkMetadataComparator comparator = new ChunkMetadataComparator(); + int totalSegmentCount = this.sortedSegmentPosition.size(); + File currentInFile = this.chunkMetadataTempFile; + File currentOutFile = new File(this.file.getAbsolutePath() + SORTING_TEMP_FILE); + LocalTsFileInput inputForWindow1 = null; + LocalTsFileInput inputForWindow2 = null; + LocalTsFileOutput output = null; + while (totalSegmentCount > 1) { + try { + inputForWindow1 = new LocalTsFileInput(currentInFile.toPath()); + inputForWindow2 = new LocalTsFileInput(currentInFile.toPath()); + output = new LocalTsFileOutput(new FileOutputStream(currentOutFile)); + totalSegmentCount = 0; + Queue newSortedSegmentPosition = new ArrayDeque<>(); + while (sortedSegmentPosition.size() > 0) { + long startPositionForWindow1 = sortedSegmentPosition.poll(); + if (sortedSegmentPosition.size() == 0) { + // Just leave it alone, and record the position + newSortedSegmentPosition.add(startPositionForWindow1); + continue; + } + long startPositionForWindow2 = sortedSegmentPosition.poll(); + ChunkMetadataExternalSortWindow firstWindow = + new ChunkMetadataExternalSortWindow( + startPositionForWindow1, startPositionForWindow2, inputForWindow1); + ChunkMetadataExternalSortWindow secondWindow = + new ChunkMetadataExternalSortWindow( + startPositionForWindow2, + sortedSegmentPosition.size() > 0 + ? sortedSegmentPosition.element() + : this.chunkMetadataTempFile.length(), + inputForWindow2); + firstWindow.getNextSeriesNameAndChunkMetadata(); + secondWindow.getNextSeriesNameAndChunkMetadata(); + newSortedSegmentPosition.add(output.getPosition()); + while (firstWindow.hasNextChunkMetadata() && secondWindow.hasNextChunkMetadata()) { + Pair pairOfFirstWindow = + firstWindow.getCurrentSeriesNameAndChunkMetadata(); + Pair pairOfSecondWindow = + secondWindow.getCurrentSeriesNameAndChunkMetadata(); + Pair pairToWritten = null; + if (comparator.compare(pairOfFirstWindow, pairOfSecondWindow) < 0) { + pairToWritten = pairOfFirstWindow; + if (firstWindow.hasNextChunkMetadata()) { + firstWindow.getNextSeriesNameAndChunkMetadata(); + } + } else { + pairToWritten = pairOfSecondWindow; + if (secondWindow.hasNextChunkMetadata()) { + secondWindow.getNextSeriesNameAndChunkMetadata(); + } + } + // serialize the chunk to the output + if (pairToWritten.right instanceof AlignedChunkMetadata) { + writeAlignedChunkMetadata( + Collections.singletonList(pairToWritten.right), + new Path(pairToWritten.left), + output); + } else { + writeNormalChunkMetadata( + Collections.singletonList(pairToWritten.right), + new Path(pairToWritten.left), + output); + } + } + } + + output.close(); + inputForWindow1.close(); + inputForWindow2.close(); + FileUtils.delete(currentInFile); + currentOutFile.renameTo(currentInFile); + File tempFile = currentOutFile; + currentOutFile = currentInFile; + currentInFile = tempFile; + } finally { + if (inputForWindow1 != null) { + inputForWindow1.close(); + } + if (inputForWindow2 != null) { + inputForWindow2.close(); + } + if (output != null) { + output.close(); + } + } + } } @Override public void close() throws IOException { super.close(); - if (tempInput != null) { - tempInput.close(); - } if (tempOutput != null) { this.tempOutput.close(); } } + + protected static class ChunkMetadataComparator + implements Comparator> { + + @Override + public int compare(Pair o1, Pair o2) { + String seriesNameOfO1 = o1.left; + String seriesNameOfO2 = o2.left; + int lexicographicalOrder = seriesNameOfO1.compareTo(seriesNameOfO2); + if (lexicographicalOrder != 0) { + return lexicographicalOrder; + } else { + return Long.compare(o1.right.getStartTime(), o2.right.getStartTime()); + } + } + } + + protected class ChunkMetadataExternalSortWindow { + + final LocalTsFileInput input; + final long startPosition; + final long endPosition; + final ByteBuffer sizeBuffer = ByteBuffer.allocate(4); + final ByteBuffer typeBuffer = ByteBuffer.allocate(1); + Pair currentPair = null; + + ChunkMetadataExternalSortWindow(long startPosition, long endPosition, LocalTsFileInput input) + throws IOException { + this.startPosition = startPosition; + this.endPosition = endPosition; + this.input = input; + this.input.position(startPosition); + } + + public boolean hasNextChunkMetadata() throws IOException { + return currentPair != null || this.input.position() < endPosition; + } + + public Pair getNextSeriesNameAndChunkMetadata() throws IOException { + byte type = readNextChunkMetadataType(); + int size = readNextChunkMetadataSize(); + ByteBuffer chunkBuffer = ByteBuffer.allocate(size); + ReadWriteIOUtils.readAsPossible(input, chunkBuffer); + chunkBuffer.flip(); + if (type == NORMAL_TYPE) { + ChunkMetadata chunkMetadata = new ChunkMetadata(); + String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); + currentPair = new Pair<>(seriesPath, chunkMetadata); + } else { + AlignedChunkMetadata chunkMetadata = new AlignedChunkMetadata(); + String devicePath = + AlignedChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); + currentPair = new Pair<>(devicePath, chunkMetadata); + } + return currentPair; + } + + public Pair getCurrentSeriesNameAndChunkMetadata() { + return currentPair; + } + + private byte readNextChunkMetadataType() throws IOException { + typeBuffer.clear(); + ReadWriteIOUtils.readAsPossible(input, typeBuffer); + typeBuffer.flip(); + return ReadWriteIOUtils.readByte(typeBuffer); + } + + private int readNextChunkMetadataSize() throws IOException { + sizeBuffer.clear(); + ReadWriteIOUtils.readAsPossible(input, sizeBuffer); + sizeBuffer.flip(); + return ReadWriteIOUtils.readInt(sizeBuffer); + } + } } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index e894901a0aeb0..25bd19297465e 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -23,6 +23,7 @@ import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; @@ -100,8 +101,14 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); + ChunkMetadataExternalSortWindow window = + writer + .new ChunkMetadataExternalSortWindow( + 0, + writer.chunkMetadataTempFile.length(), + new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); for (int i = 0; i < originChunkMetadataList.size(); ++i) { - Pair chunkMetadataPair = writer.readNextChunkMetadata(); + Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); Assert.assertEquals("root.sg.d" + i / 5 + ".s" + i % 5, chunkMetadataPair.left); Assert.assertEquals( originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); @@ -150,8 +157,14 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); } + ChunkMetadataExternalSortWindow window = + writer + .new ChunkMetadataExternalSortWindow( + 0, + writer.chunkMetadataTempFile.length(), + new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); for (int i = 0; i < alignedChunkMetadata.size(); ++i) { - Pair chunkMetadataPair = writer.readNextChunkMetadata(); + Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); Assert.assertEquals("root.sg.d" + i, chunkMetadataPair.left); Assert.assertEquals( alignedChunkMetadata.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); @@ -213,8 +226,14 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); + ChunkMetadataExternalSortWindow window = + writer + .new ChunkMetadataExternalSortWindow( + 0, + writer.chunkMetadataTempFile.length(), + new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); for (int i = 0, deviceCnt = 0; i < originChunkMetadataList.size(); ++i) { - Pair chunkMetadataPair = writer.readNextChunkMetadata(); + Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); if (originChunkMetadataList.get(i) instanceof ChunkMetadata) { Assert.assertEquals( "root.sg.d" + deviceCnt + "." + originChunkMetadataList.get(i).getMeasurementUid(), From eda85f5ad2cb6dbac82e920d8e445d22f60597cc Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 6 Sep 2022 15:22:26 +0800 Subject: [PATCH 04/31] temp --- .../writer/MemoryControlTsFileIOWriter.java | 224 +++++++----------- .../tsfile/write/writer/TsFileIOWriter.java | 16 +- .../MemoryControlTsFileIOWriterTest.java | 12 +- 3 files changed, 104 insertions(+), 148 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index e84970ced18c5..371f4d2ae7a4d 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -19,9 +19,11 @@ package org.apache.iotdb.tsfile.write.writer; +import org.apache.iotdb.tsfile.file.MetaMarker; import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; @@ -29,7 +31,6 @@ import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; -import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,8 +40,6 @@ import java.nio.ByteBuffer; import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Queue; @@ -51,33 +50,42 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { protected long currentChunkMetadataSize = 0L; protected File chunkMetadataTempFile; protected LocalTsFileOutput tempOutput; - protected final boolean needSort; - protected Queue sortedSegmentPosition = new ArrayDeque<>(); + protected final boolean autoControl; + // it stores the start address of persisted chunk metadata for per series + protected Queue segmentForPerSeries = new ArrayDeque<>(); + protected String currentSeries = null; public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; - private static final String SORTING_TEMP_FILE = ".scmt"; private static final byte VECTOR_TYPE = 1; private static final byte NORMAL_TYPE = 2; - public MemoryControlTsFileIOWriter(File file, long maxMetadataSize, boolean needSort) + public MemoryControlTsFileIOWriter(File file, long maxMetadataSize, boolean autoControl) throws IOException { super(file); this.maxMetadataSize = maxMetadataSize; this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_PREFIX); - this.needSort = needSort; + this.autoControl = autoControl; } @Override public void endCurrentChunk() { currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); super.endCurrentChunk(); + if (this.autoControl) { + checkMetadataSizeAndMayFlush(); + } + } + + public boolean checkMetadataSizeAndMayFlush() { if (currentChunkMetadataSize > maxMetadataSize) { try { sortAndFlushChunkMetadata(); + return true; } catch (IOException e) { LOG.error("Meets exception when flushing metadata to temp files", e); } } + return false; } protected void sortAndFlushChunkMetadata() throws IOException { @@ -86,10 +94,10 @@ protected void sortAndFlushChunkMetadata() throws IOException { if (tempOutput == null) { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } - sortedSegmentPosition.add(tempOutput.getPosition()); // the file structure in temp file will be // ChunkType | chunkSize | chunkBuffer for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { + segmentForPerSeries.add(tempOutput.getPosition()); Path seriesPath = entry.getKey(); List iChunkMetadataList = entry.getValue(); writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); @@ -103,27 +111,33 @@ private void writeChunkMetadata( return; } if (iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { - IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); - List currentValueChunk = new ArrayList<>(); - List alignedChunkMetadata = new ArrayList<>(); - for (int i = 1; i < iChunkMetadataList.size(); ++i) { - if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { - alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - currentTimeChunk = iChunkMetadataList.get(i); - currentValueChunk = new ArrayList<>(); - } else { - currentValueChunk.add(iChunkMetadataList.get(i)); - } - } - if (currentValueChunk.size() > 0) { - alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - } + // pack the TimeChunkMetadata and List into List + List alignedChunkMetadata = packAlignedChunkMetadata(iChunkMetadataList); writeAlignedChunkMetadata(alignedChunkMetadata, seriesPath, output); } else { writeNormalChunkMetadata(iChunkMetadataList, seriesPath, output); } } + private List packAlignedChunkMetadata(List iChunkMetadataList) { + IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); + List currentValueChunk = new ArrayList<>(); + List alignedChunkMetadata = new ArrayList<>(); + for (int i = 1; i < iChunkMetadataList.size(); ++i) { + if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { + alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); + currentTimeChunk = iChunkMetadataList.get(i); + currentValueChunk = new ArrayList<>(); + } else { + currentValueChunk.add(iChunkMetadataList.get(i)); + } + } + if (currentValueChunk.size() > 0) { + alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); + } + return alignedChunkMetadata; + } + private void writeAlignedChunkMetadata( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { @@ -150,112 +164,60 @@ private void writeNormalChunkMetadata( @Override public void endFile() throws IOException { - if (this.sortedSegmentPosition.size() > 0) { + if (this.segmentForPerSeries.size() > 0) { // there is some chunk metadata already been written to the disk + // first we should flush the remaining chunk metadata in memory to disk + // then read the persisted chunk metadata from disk sortAndFlushChunkMetadata(); tempOutput.close(); } else { - // sort the chunk metadata in memory, and just close the file + // sort the chunk metadata in memory, construct the index tree + // and just close the file tempOutput.close(); super.endFile(); return; } - if (needSort) { - externalSort(); - } - - // super.endFile(); + // read in the chunk metadata, and construct the index tree + readChunkMetadataAndConstructIndexTree(); } - protected void externalSort() throws IOException { - ChunkMetadataComparator comparator = new ChunkMetadataComparator(); - int totalSegmentCount = this.sortedSegmentPosition.size(); - File currentInFile = this.chunkMetadataTempFile; - File currentOutFile = new File(this.file.getAbsolutePath() + SORTING_TEMP_FILE); - LocalTsFileInput inputForWindow1 = null; - LocalTsFileInput inputForWindow2 = null; - LocalTsFileOutput output = null; - while (totalSegmentCount > 1) { - try { - inputForWindow1 = new LocalTsFileInput(currentInFile.toPath()); - inputForWindow2 = new LocalTsFileInput(currentInFile.toPath()); - output = new LocalTsFileOutput(new FileOutputStream(currentOutFile)); - totalSegmentCount = 0; - Queue newSortedSegmentPosition = new ArrayDeque<>(); - while (sortedSegmentPosition.size() > 0) { - long startPositionForWindow1 = sortedSegmentPosition.poll(); - if (sortedSegmentPosition.size() == 0) { - // Just leave it alone, and record the position - newSortedSegmentPosition.add(startPositionForWindow1); - continue; - } - long startPositionForWindow2 = sortedSegmentPosition.poll(); - ChunkMetadataExternalSortWindow firstWindow = - new ChunkMetadataExternalSortWindow( - startPositionForWindow1, startPositionForWindow2, inputForWindow1); - ChunkMetadataExternalSortWindow secondWindow = - new ChunkMetadataExternalSortWindow( - startPositionForWindow2, - sortedSegmentPosition.size() > 0 - ? sortedSegmentPosition.element() - : this.chunkMetadataTempFile.length(), - inputForWindow2); - firstWindow.getNextSeriesNameAndChunkMetadata(); - secondWindow.getNextSeriesNameAndChunkMetadata(); - newSortedSegmentPosition.add(output.getPosition()); - while (firstWindow.hasNextChunkMetadata() && secondWindow.hasNextChunkMetadata()) { - Pair pairOfFirstWindow = - firstWindow.getCurrentSeriesNameAndChunkMetadata(); - Pair pairOfSecondWindow = - secondWindow.getCurrentSeriesNameAndChunkMetadata(); - Pair pairToWritten = null; - if (comparator.compare(pairOfFirstWindow, pairOfSecondWindow) < 0) { - pairToWritten = pairOfFirstWindow; - if (firstWindow.hasNextChunkMetadata()) { - firstWindow.getNextSeriesNameAndChunkMetadata(); - } - } else { - pairToWritten = pairOfSecondWindow; - if (secondWindow.hasNextChunkMetadata()) { - secondWindow.getNextSeriesNameAndChunkMetadata(); - } - } - // serialize the chunk to the output - if (pairToWritten.right instanceof AlignedChunkMetadata) { - writeAlignedChunkMetadata( - Collections.singletonList(pairToWritten.right), - new Path(pairToWritten.left), - output); - } else { - writeNormalChunkMetadata( - Collections.singletonList(pairToWritten.right), - new Path(pairToWritten.left), - output); - } - } - } + private void readChunkMetadataAndConstructIndexTree() throws IOException { + tempOutput.close(); + long metaOffset = out.getPosition(); - output.close(); - inputForWindow1.close(); - inputForWindow2.close(); - FileUtils.delete(currentInFile); - currentOutFile.renameTo(currentInFile); - File tempFile = currentOutFile; - currentOutFile = currentInFile; - currentInFile = tempFile; - } finally { - if (inputForWindow1 != null) { - inputForWindow1.close(); - } - if (inputForWindow2 != null) { - inputForWindow2.close(); - } - if (output != null) { - output.close(); - } - } + // serialize the SEPARATOR of MetaData + ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); + ChunkMetadataReadIterator iterator = + new ChunkMetadataReadIterator( + 0, + chunkMetadataTempFile.length(), + new LocalTsFileInput(chunkMetadataTempFile.toPath())); + while (iterator.hasNextChunkMetadata()) { + // 1. read in all chunk metadata of one series + // 2. construct the timeseries metadata for this series + // 3. construct the index tree node for the series + // 4. serialize the timeseries metadata to file + TimeseriesMetadata timeseriesMetadata = readTimeseriesMetadata(iterator); + } + } + + private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iterator) + throws IOException { + Pair currentPair = iterator.getCurrentPair(); + if (currentPair == null) { + currentPair = iterator.getNextSeriesNameAndChunkMetadata(); + } + if (!currentPair.left.equals(currentSeries)) { + // come to a new series + currentSeries = currentPair.left; + } + List iChunkMetadataList = new ArrayList<>(); + while (currentPair != null && currentPair.left.equals(currentSeries)) { + iChunkMetadataList.add(currentPair.right); + currentPair = iterator.getNextSeriesNameAndChunkMetadata(); } + return super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList, false); } @Override @@ -266,32 +228,16 @@ public void close() throws IOException { } } - protected static class ChunkMetadataComparator - implements Comparator> { - - @Override - public int compare(Pair o1, Pair o2) { - String seriesNameOfO1 = o1.left; - String seriesNameOfO2 = o2.left; - int lexicographicalOrder = seriesNameOfO1.compareTo(seriesNameOfO2); - if (lexicographicalOrder != 0) { - return lexicographicalOrder; - } else { - return Long.compare(o1.right.getStartTime(), o2.right.getStartTime()); - } - } - } - - protected class ChunkMetadataExternalSortWindow { + protected class ChunkMetadataReadIterator { final LocalTsFileInput input; final long startPosition; final long endPosition; final ByteBuffer sizeBuffer = ByteBuffer.allocate(4); final ByteBuffer typeBuffer = ByteBuffer.allocate(1); - Pair currentPair = null; + private Pair currentPair = null; - ChunkMetadataExternalSortWindow(long startPosition, long endPosition, LocalTsFileInput input) + ChunkMetadataReadIterator(long startPosition, long endPosition, LocalTsFileInput input) throws IOException { this.startPosition = startPosition; this.endPosition = endPosition; @@ -304,6 +250,10 @@ public boolean hasNextChunkMetadata() throws IOException { } public Pair getNextSeriesNameAndChunkMetadata() throws IOException { + if (input.position() >= endPosition) { + currentPair = null; + return null; + } byte type = readNextChunkMetadataType(); int size = readNextChunkMetadataSize(); ByteBuffer chunkBuffer = ByteBuffer.allocate(size); @@ -322,7 +272,7 @@ public Pair getNextSeriesNameAndChunkMetadata() throws I return currentPair; } - public Pair getCurrentSeriesNameAndChunkMetadata() { + public Pair getCurrentPair() { return currentPair; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 59a8ec236d629..c34ab1df63f87 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -327,7 +327,7 @@ private MetadataIndexNode flushMetadataIndex(Map> chu // create device -> TimeseriesMetaDataList Map for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { // for ordinary path - flushOneChunkMetadata(entry.getKey(), entry.getValue()); + constructOneTimeseriesMetadata(entry.getKey(), entry.getValue(), true); } // construct TsFileMetadata and return @@ -339,8 +339,11 @@ private MetadataIndexNode flushMetadataIndex(Map> chu * * @param path Path of chunk * @param chunkMetadataList List of chunkMetadata about path(previous param) + * @param needRecordInMap need to record the timeseries metadata in deviceTimeseriesMetadataMap + * @return the constructed TimeseriesMetadata */ - private void flushOneChunkMetadata(Path path, List chunkMetadataList) + protected TimeseriesMetadata constructOneTimeseriesMetadata( + Path path, List chunkMetadataList, boolean needRecordInMap) throws IOException { // create TimeseriesMetaData PublicBAOS publicBAOS = new PublicBAOS(); @@ -367,9 +370,12 @@ private void flushOneChunkMetadata(Path path, List chunkMetadata dataType, seriesStatistics, publicBAOS); - deviceTimeseriesMetadataMap - .computeIfAbsent(path.getDevice(), k -> new ArrayList<>()) - .add(timeseriesMetadata); + if (needRecordInMap) { + deviceTimeseriesMetadataMap + .computeIfAbsent(path.getDevice(), k -> new ArrayList<>()) + .add(timeseriesMetadata); + } + return timeseriesMetadata; } /** diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index 25bd19297465e..392a69922252d 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -101,9 +101,9 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); - ChunkMetadataExternalSortWindow window = + ChunkMetadataReadIterator window = writer - .new ChunkMetadataExternalSortWindow( + .new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); @@ -157,9 +157,9 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); } - ChunkMetadataExternalSortWindow window = + ChunkMetadataReadIterator window = writer - .new ChunkMetadataExternalSortWindow( + .new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); @@ -226,9 +226,9 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); - ChunkMetadataExternalSortWindow window = + ChunkMetadataReadIterator window = writer - .new ChunkMetadataExternalSortWindow( + .new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); From 1afdabe328399e5c6d474ed277d51a5d99625cf6 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 6 Sep 2022 17:09:28 +0800 Subject: [PATCH 05/31] finish memory control tsfile io writer --- .../metadata/MetadataIndexConstructor.java | 4 +- .../file/metadata/MetadataIndexNode.java | 2 +- .../tsfile/file/metadata/TsFileMetadata.java | 9 +- .../writer/MemoryControlTsFileIOWriter.java | 123 +++++++++++++++++- .../tsfile/write/writer/TsFileIOWriter.java | 4 +- 5 files changed, 132 insertions(+), 10 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java index 062ffd6183ae1..beb9ddb3e53a2 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java @@ -123,7 +123,7 @@ public static MetadataIndexNode constructMetadataIndex( * @param out tsfile output * @param type MetadataIndexNode type */ - private static MetadataIndexNode generateRootNode( + public static MetadataIndexNode generateRootNode( Queue metadataIndexNodeQueue, TsFileOutput out, MetadataIndexNodeType type) throws IOException { int queueSize = metadataIndexNodeQueue.size(); @@ -148,7 +148,7 @@ private static MetadataIndexNode generateRootNode( return metadataIndexNodeQueue.poll(); } - private static void addCurrentIndexNodeToQueue( + public static void addCurrentIndexNodeToQueue( MetadataIndexNode currentIndexNode, Queue metadataIndexNodeQueue, TsFileOutput out) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexNode.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexNode.java index 3f6f6336b30ad..1d3972cafe4d5 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexNode.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexNode.java @@ -74,7 +74,7 @@ public void addEntry(MetadataIndexEntry metadataIndexEntry) { this.children.add(metadataIndexEntry); } - boolean isFull() { + public boolean isFull() { return children.size() >= config.getMaxDegreeOfIndexNode(); } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadata.java index 95e01e2da11fd..f6f974fc1ad13 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/TsFileMetadata.java @@ -105,10 +105,15 @@ public int serializeTo(OutputStream outputStream) throws IOException { * @param outputStream -output stream to determine byte length * @return -byte length */ - public int serializeBloomFilter(OutputStream outputStream, Set paths) throws IOException { - int byteLen = 0; + public int buildAndSerializeBloomFilter(OutputStream outputStream, Set paths) + throws IOException { BloomFilter filter = buildBloomFilter(paths); + return serializeBloomFilter(outputStream, filter); + } + public int serializeBloomFilter(OutputStream outputStream, BloomFilter filter) + throws IOException { + int byteLen = 0; byte[] bytes = filter.serialize(); byteLen += ReadWriteForEncodingUtils.writeUnsignedVarInt(bytes.length, outputStream); outputStream.write(bytes); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 371f4d2ae7a4d..acd0ba1aa4127 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -19,14 +19,21 @@ package org.apache.iotdb.tsfile.write.writer; +import org.apache.iotdb.tsfile.common.conf.TSFileConfig; +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; import org.apache.iotdb.tsfile.file.MetaMarker; import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; +import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; +import org.apache.iotdb.tsfile.utils.BloomFilter; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; @@ -43,6 +50,10 @@ import java.util.List; import java.util.Map; import java.util.Queue; +import java.util.TreeMap; + +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.addCurrentIndexNodeToQueue; +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; public class MemoryControlTsFileIOWriter extends TsFileIOWriter { private static final Logger LOG = LoggerFactory.getLogger(MemoryControlTsFileIOWriter.class); @@ -54,6 +65,9 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { // it stores the start address of persisted chunk metadata for per series protected Queue segmentForPerSeries = new ArrayDeque<>(); protected String currentSeries = null; + // record the total num of path in order to make bloom filter + protected int pathCount = 0; + Path lastSerializePath = null; public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; private static final byte VECTOR_TYPE = 1; @@ -99,8 +113,12 @@ protected void sortAndFlushChunkMetadata() throws IOException { for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { segmentForPerSeries.add(tempOutput.getPosition()); Path seriesPath = entry.getKey(); + if (!seriesPath.equals(lastSerializePath)) { + pathCount++; + } List iChunkMetadataList = entry.getValue(); writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); + lastSerializePath = seriesPath; } } @@ -188,20 +206,117 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // serialize the SEPARATOR of MetaData ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); + ChunkMetadataReadIterator iterator = new ChunkMetadataReadIterator( 0, chunkMetadataTempFile.length(), new LocalTsFileInput(chunkMetadataTempFile.toPath())); + Map deviceMetadataIndexMap = new TreeMap<>(); + Queue measurementMetadataIndexQueue = null; + String currentDevice = null; + String prevDevice = null; + MetadataIndexNode currentIndexNode = + new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + TSFileConfig config = TSFileDescriptor.getInstance().getConfig(); + int seriesIdxForCurrDevice = 0; + BloomFilter filter = + BloomFilter.getEmptyBloomFilter( + TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), pathCount); while (iterator.hasNextChunkMetadata()) { - // 1. read in all chunk metadata of one series - // 2. construct the timeseries metadata for this series - // 3. construct the index tree node for the series - // 4. serialize the timeseries metadata to file + // read in all chunk metadata of one series + // construct the timeseries metadata for this series TimeseriesMetadata timeseriesMetadata = readTimeseriesMetadata(iterator); + // build bloom filter + filter.add(currentSeries); + // construct the index tree node for the series + currentDevice = new Path(currentSeries).getDevice(); + if (!currentDevice.equals(prevDevice)) { + if (prevDevice != null) { + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + deviceMetadataIndexMap.put( + prevDevice, + generateRootNode( + measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + } + measurementMetadataIndexQueue = new ArrayDeque<>(); + seriesIdxForCurrDevice = 0; + } + + if (seriesIdxForCurrDevice % config.getMaxDegreeOfIndexNode() == 0) { + if (currentIndexNode.isFull()) { + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + } + currentIndexNode.addEntry( + new MetadataIndexEntry(timeseriesMetadata.getMeasurementId(), out.getPosition())); + } + + prevDevice = currentDevice; + seriesIdxForCurrDevice++; + // serialize the timeseries metadata to file + timeseriesMetadata.serializeTo(out.wrapAsStream()); + } + + MetadataIndexNode metadataIndex = null; + // if not exceed the max child nodes num, ignore the device index and directly point to the + // measurement + if (deviceMetadataIndexMap.size() <= config.getMaxDegreeOfIndexNode()) { + MetadataIndexNode metadataIndexNode = + new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); + for (Map.Entry entry : deviceMetadataIndexMap.entrySet()) { + metadataIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); + entry.getValue().serializeTo(out.wrapAsStream()); + } + metadataIndexNode.setEndOffset(out.getPosition()); + metadataIndex = metadataIndexNode; + } else { + // else, build level index for devices + Queue deviceMetadataIndexQueue = new ArrayDeque<>(); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); + + for (Map.Entry entry : deviceMetadataIndexMap.entrySet()) { + // when constructing from internal node, each node is related to an entry + if (currentIndexNode.isFull()) { + addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); + } + currentIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); + entry.getValue().serializeTo(out.wrapAsStream()); + } + addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); + MetadataIndexNode deviceMetadataIndexNode = + generateRootNode(deviceMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_DEVICE); + deviceMetadataIndexNode.setEndOffset(out.getPosition()); + metadataIndex = deviceMetadataIndexNode; } + + TsFileMetadata tsFileMetadata = new TsFileMetadata(); + tsFileMetadata.setMetadataIndex(metadataIndex); + tsFileMetadata.setMetaOffset(metaOffset); + + int size = tsFileMetadata.serializeTo(out.wrapAsStream()); + size += tsFileMetadata.serializeBloomFilter(out.wrapAsStream(), filter); + + // write TsFileMetaData size + ReadWriteIOUtils.write(size, out.wrapAsStream()); + + // write magic string + out.write(MAGIC_STRING_BYTES); + + // close file + out.close(); + canWrite = false; } + /** + * Read in all the chunk metadata for a series, and construct a TimeseriesMetadata for it + * + * @param iterator + * @return + * @throws IOException + */ private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iterator) throws IOException { Pair currentPair = iterator.getCurrentPair(); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index c34ab1df63f87..aba72ce2e4e60 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -294,7 +294,9 @@ public void endFile() throws IOException { } // write bloom filter - size += tsFileMetaData.serializeBloomFilter(out.wrapAsStream(), chunkMetadataListMap.keySet()); + size += + tsFileMetaData.buildAndSerializeBloomFilter( + out.wrapAsStream(), chunkMetadataListMap.keySet()); if (logger.isDebugEnabled()) { logger.debug("finish flushing the bloom filter file pos:{}", out.getPosition()); } From c99d54ac80da0bfce55e7d5829f1e6aefc941112 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 6 Sep 2022 17:22:23 +0800 Subject: [PATCH 06/31] refactor some codes --- .../metadata/MetadataIndexConstructor.java | 5 ++ .../writer/MemoryControlTsFileIOWriter.java | 49 ++++--------------- 2 files changed, 15 insertions(+), 39 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java index beb9ddb3e53a2..44cdc8b0bf4b6 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/MetadataIndexConstructor.java @@ -81,6 +81,11 @@ public static MetadataIndexNode constructMetadataIndex( measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); } + return checkAndBuildLevelIndex(deviceMetadataIndexMap, out); + } + + public static MetadataIndexNode checkAndBuildLevelIndex( + Map deviceMetadataIndexMap, TsFileOutput out) throws IOException { // if not exceed the max child nodes num, ignore the device index and directly point to the // measurement if (deviceMetadataIndexMap.size() <= config.getMaxDegreeOfIndexNode()) { diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index acd0ba1aa4127..b8d8c7b768d1a 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -53,6 +53,7 @@ import java.util.TreeMap; import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.addCurrentIndexNodeToQueue; +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.checkAndBuildLevelIndex; import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; public class MemoryControlTsFileIOWriter extends TsFileIOWriter { @@ -198,6 +199,13 @@ public void endFile() throws IOException { // read in the chunk metadata, and construct the index tree readChunkMetadataAndConstructIndexTree(); + + // write magic string + out.write(MAGIC_STRING_BYTES); + + // close file + out.close(); + canWrite = false; } private void readChunkMetadataAndConstructIndexTree() throws IOException { @@ -223,6 +231,7 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { BloomFilter filter = BloomFilter.getEmptyBloomFilter( TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), pathCount); + while (iterator.hasNextChunkMetadata()) { // read in all chunk metadata of one series // construct the timeseries metadata for this series @@ -259,38 +268,7 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { timeseriesMetadata.serializeTo(out.wrapAsStream()); } - MetadataIndexNode metadataIndex = null; - // if not exceed the max child nodes num, ignore the device index and directly point to the - // measurement - if (deviceMetadataIndexMap.size() <= config.getMaxDegreeOfIndexNode()) { - MetadataIndexNode metadataIndexNode = - new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); - for (Map.Entry entry : deviceMetadataIndexMap.entrySet()) { - metadataIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); - entry.getValue().serializeTo(out.wrapAsStream()); - } - metadataIndexNode.setEndOffset(out.getPosition()); - metadataIndex = metadataIndexNode; - } else { - // else, build level index for devices - Queue deviceMetadataIndexQueue = new ArrayDeque<>(); - currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); - - for (Map.Entry entry : deviceMetadataIndexMap.entrySet()) { - // when constructing from internal node, each node is related to an entry - if (currentIndexNode.isFull()) { - addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); - currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_DEVICE); - } - currentIndexNode.addEntry(new MetadataIndexEntry(entry.getKey(), out.getPosition())); - entry.getValue().serializeTo(out.wrapAsStream()); - } - addCurrentIndexNodeToQueue(currentIndexNode, deviceMetadataIndexQueue, out); - MetadataIndexNode deviceMetadataIndexNode = - generateRootNode(deviceMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_DEVICE); - deviceMetadataIndexNode.setEndOffset(out.getPosition()); - metadataIndex = deviceMetadataIndexNode; - } + MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); TsFileMetadata tsFileMetadata = new TsFileMetadata(); tsFileMetadata.setMetadataIndex(metadataIndex); @@ -301,13 +279,6 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // write TsFileMetaData size ReadWriteIOUtils.write(size, out.wrapAsStream()); - - // write magic string - out.write(MAGIC_STRING_BYTES); - - // close file - out.close(); - canWrite = false; } /** From 0a0a420a1476c755c96b2cd24eceeb48e71a1bd2 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Wed, 7 Sep 2022 17:05:53 +0800 Subject: [PATCH 07/31] fix some bugs, and add some test --- .../iotdb/tsfile/TsFileSequenceRead.java | 2 +- .../writer/MemoryControlTsFileIOWriter.java | 78 ++- .../tsfile/write/writer/TsFileIOWriter.java | 9 +- .../write/TsFileIntegrityCheckingTool.java | 175 +++++ .../MemoryControlTsFileIOWriterTest.java | 620 ++++++++++++++++-- 5 files changed, 814 insertions(+), 70 deletions(-) create mode 100644 tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java index aa946f67b7d5e..d15882b3b8d69 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java @@ -53,7 +53,7 @@ public class TsFileSequenceRead { "squid:S106" }) // Suppress high Cognitive Complexity and Standard outputs warning public static void main(String[] args) throws IOException { - String filename = "test.tsfile"; + String filename = "C:\\Users\\MARKLAU\\Desktop\\iotdb\\1-1-0-0.tsfile"; if (args.length >= 1) { filename = args[0]; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index b8d8c7b768d1a..fc47e74210bd4 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -64,7 +64,8 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { protected LocalTsFileOutput tempOutput; protected final boolean autoControl; // it stores the start address of persisted chunk metadata for per series - protected Queue segmentForPerSeries = new ArrayDeque<>(); + // protected Queue segmentForPerSeries = new ArrayDeque<>(); + protected volatile boolean hasChunkMetadataInDisk = false; protected String currentSeries = null; // record the total num of path in order to make bloom filter protected int pathCount = 0; @@ -109,10 +110,10 @@ protected void sortAndFlushChunkMetadata() throws IOException { if (tempOutput == null) { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } + hasChunkMetadataInDisk = true; // the file structure in temp file will be // ChunkType | chunkSize | chunkBuffer for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { - segmentForPerSeries.add(tempOutput.getPosition()); Path seriesPath = entry.getKey(); if (!seriesPath.equals(lastSerializePath)) { pathCount++; @@ -121,6 +122,11 @@ protected void sortAndFlushChunkMetadata() throws IOException { writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); lastSerializePath = seriesPath; } + // clear the cache metadata to release the memory + chunkGroupMetadataList.clear(); + if (chunkMetadataList != null) { + chunkMetadataList.clear(); + } } private void writeChunkMetadata( @@ -160,8 +166,8 @@ private List packAlignedChunkMetadata(List iChun private void writeAlignedChunkMetadata( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - ReadWriteIOUtils.write(VECTOR_TYPE, output); for (IChunkMetadata chunkMetadata : iChunkMetadataList) { + ReadWriteIOUtils.write(VECTOR_TYPE, output); PublicBAOS buffer = new PublicBAOS(); int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getDevice()); ReadWriteIOUtils.write(size, output); @@ -172,8 +178,8 @@ private void writeAlignedChunkMetadata( private void writeNormalChunkMetadata( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - ReadWriteIOUtils.write(NORMAL_TYPE, output); for (IChunkMetadata chunkMetadata : iChunkMetadataList) { + ReadWriteIOUtils.write(NORMAL_TYPE, output); PublicBAOS buffer = new PublicBAOS(); int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getFullPath()); ReadWriteIOUtils.write(size, output); @@ -183,7 +189,7 @@ private void writeNormalChunkMetadata( @Override public void endFile() throws IOException { - if (this.segmentForPerSeries.size() > 0) { + if (hasChunkMetadataInDisk) { // there is some chunk metadata already been written to the disk // first we should flush the remaining chunk metadata in memory to disk // then read the persisted chunk metadata from disk @@ -192,7 +198,6 @@ public void endFile() throws IOException { } else { // sort the chunk metadata in memory, construct the index tree // and just close the file - tempOutput.close(); super.endFile(); return; } @@ -221,7 +226,7 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { chunkMetadataTempFile.length(), new LocalTsFileInput(chunkMetadataTempFile.toPath())); Map deviceMetadataIndexMap = new TreeMap<>(); - Queue measurementMetadataIndexQueue = null; + Queue measurementMetadataIndexQueue = new ArrayDeque<>(); String currentDevice = null; String prevDevice = null; MetadataIndexNode currentIndexNode = @@ -232,14 +237,17 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { BloomFilter.getEmptyBloomFilter( TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), pathCount); + int indexCount = 0; while (iterator.hasNextChunkMetadata()) { // read in all chunk metadata of one series // construct the timeseries metadata for this series TimeseriesMetadata timeseriesMetadata = readTimeseriesMetadata(iterator); + indexCount++; // build bloom filter filter.add(currentSeries); // construct the index tree node for the series - currentDevice = new Path(currentSeries).getDevice(); + Path currentPath = new Path(currentSeries, true); + currentDevice = currentPath.getDevice(); if (!currentDevice.equals(prevDevice)) { if (prevDevice != null) { addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); @@ -259,7 +267,7 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); } currentIndexNode.addEntry( - new MetadataIndexEntry(timeseriesMetadata.getMeasurementId(), out.getPosition())); + new MetadataIndexEntry(currentPath.getMeasurement(), out.getPosition())); } prevDevice = currentDevice; @@ -268,6 +276,18 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { timeseriesMetadata.serializeTo(out.wrapAsStream()); } + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + deviceMetadataIndexMap.put( + prevDevice, + generateRootNode( + measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + + if (indexCount != pathCount) { + throw new IOException( + String.format( + "Expected path count is %d, index path count is %d", pathCount, indexCount)); + } + MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); TsFileMetadata tsFileMetadata = new TsFileMetadata(); @@ -290,20 +310,12 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { */ private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iterator) throws IOException { - Pair currentPair = iterator.getCurrentPair(); - if (currentPair == null) { - currentPair = iterator.getNextSeriesNameAndChunkMetadata(); - } - if (!currentPair.left.equals(currentSeries)) { - // come to a new series - currentSeries = currentPair.left; - } List iChunkMetadataList = new ArrayList<>(); - while (currentPair != null && currentPair.left.equals(currentSeries)) { - iChunkMetadataList.add(currentPair.right); - currentPair = iterator.getNextSeriesNameAndChunkMetadata(); - } - return super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList, false); + currentSeries = iterator.getAllChunkMetadataForNextSeries(iChunkMetadataList); + TimeseriesMetadata timeseriesMetadata = + super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList, false); + timeseriesMetadata.setMeasurementId(new Path(currentSeries, true).getMeasurement()); + return timeseriesMetadata; } @Override @@ -358,6 +370,28 @@ public Pair getNextSeriesNameAndChunkMetadata() throws I return currentPair; } + public String getAllChunkMetadataForNextSeries(List iChunkMetadataList) + throws IOException { + if (currentPair == null) { + if (!hasNextChunkMetadata()) { + return null; + } else { + getNextSeriesNameAndChunkMetadata(); + } + } + String currentSeries = currentPair.left; + iChunkMetadataList.add(currentPair.right); + while (hasNextChunkMetadata()) { + getNextSeriesNameAndChunkMetadata(); + if (currentPair != null && currentPair.left.equals(currentSeries)) { + iChunkMetadataList.add(currentPair.right); + } else { + break; + } + } + return currentSeries; + } + public Pair getCurrentPair() { return currentPair; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index aba72ce2e4e60..89f5ad6d7bb19 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -253,11 +253,10 @@ protected Map> groupChunkMetadataListBySeries() { } if (chunkMetadataList != null && chunkMetadataList.size() > 0) { - ChunkMetadata chunkMetadata = chunkMetadataList.get(0); - Path series = new Path(currentChunkGroupDeviceId, chunkMetadata.getMeasurementUid()); - chunkMetadataListMap - .computeIfAbsent(series, k -> new ArrayList<>()) - .addAll(chunkMetadataList); + for (ChunkMetadata chunkMetadata : chunkMetadataList) { + Path series = new Path(currentChunkGroupDeviceId, chunkMetadata.getMeasurementUid()); + chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + } } return chunkMetadataListMap; } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java new file mode 100644 index 0000000000000..b635d63a9c26f --- /dev/null +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.write; + +import org.apache.iotdb.tsfile.common.conf.TSFileConfig; +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; +import org.apache.iotdb.tsfile.common.constant.TsFileConstant; +import org.apache.iotdb.tsfile.encoding.decoder.Decoder; +import org.apache.iotdb.tsfile.file.MetaMarker; +import org.apache.iotdb.tsfile.file.header.ChunkGroupHeader; +import org.apache.iotdb.tsfile.file.header.ChunkHeader; +import org.apache.iotdb.tsfile.file.header.PageHeader; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.read.TsFileSequenceReader; +import org.apache.iotdb.tsfile.read.common.BatchData; +import org.apache.iotdb.tsfile.read.common.Chunk; +import org.apache.iotdb.tsfile.read.reader.IPointReader; +import org.apache.iotdb.tsfile.read.reader.chunk.ChunkReader; +import org.apache.iotdb.tsfile.read.reader.page.PageReader; +import org.apache.iotdb.tsfile.read.reader.page.TimePageReader; +import org.apache.iotdb.tsfile.read.reader.page.ValuePageReader; +import org.apache.iotdb.tsfile.utils.TsPrimitiveType; + +import org.junit.Assert; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +/** This class provide some static method to check the integrity of tsfile */ +public class TsFileIntegrityCheckingTool { + private static Logger LOG = LoggerFactory.getLogger(TsFileIntegrityCheckingTool.class); + + public static void checkIntegrityBySequenceRead(String filename) { + try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) { + String headMagicString = reader.readHeadMagic(); + Assert.assertEquals(TSFileConfig.MAGIC_STRING, headMagicString); + String tailMagicString = reader.readTailMagic(); + Assert.assertEquals(TSFileConfig.MAGIC_STRING, tailMagicString); + reader.position((long) TSFileConfig.MAGIC_STRING.getBytes().length + 1); + List timeBatch = new ArrayList<>(); + int pageIndex = 0; + byte marker; + while ((marker = reader.readMarker()) != MetaMarker.SEPARATOR) { + switch (marker) { + case MetaMarker.CHUNK_HEADER: + case MetaMarker.TIME_CHUNK_HEADER: + case MetaMarker.VALUE_CHUNK_HEADER: + case MetaMarker.ONLY_ONE_PAGE_CHUNK_HEADER: + case MetaMarker.ONLY_ONE_PAGE_TIME_CHUNK_HEADER: + case MetaMarker.ONLY_ONE_PAGE_VALUE_CHUNK_HEADER: + ChunkHeader header = reader.readChunkHeader(marker); + if (header.getDataSize() == 0) { + // empty value chunk + break; + } + Decoder defaultTimeDecoder = + Decoder.getDecoderByType( + TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()), + TSDataType.INT64); + Decoder valueDecoder = + Decoder.getDecoderByType(header.getEncodingType(), header.getDataType()); + int dataSize = header.getDataSize(); + pageIndex = 0; + if (header.getDataType() == TSDataType.VECTOR) { + timeBatch.clear(); + } + while (dataSize > 0) { + valueDecoder.reset(); + PageHeader pageHeader = + reader.readPageHeader( + header.getDataType(), + (header.getChunkType() & 0x3F) == MetaMarker.CHUNK_HEADER); + ByteBuffer pageData = reader.readPage(pageHeader, header.getCompressionType()); + if ((header.getChunkType() & (byte) TsFileConstant.TIME_COLUMN_MASK) + == (byte) TsFileConstant.TIME_COLUMN_MASK) { // Time Chunk + TimePageReader timePageReader = + new TimePageReader(pageHeader, pageData, defaultTimeDecoder); + timeBatch.add(timePageReader.getNextTimeBatch()); + } else if ((header.getChunkType() & (byte) TsFileConstant.VALUE_COLUMN_MASK) + == (byte) TsFileConstant.VALUE_COLUMN_MASK) { // Value Chunk + ValuePageReader valuePageReader = + new ValuePageReader(pageHeader, pageData, header.getDataType(), valueDecoder); + TsPrimitiveType[] valueBatch = + valuePageReader.nextValueBatch(timeBatch.get(pageIndex)); + } else { // NonAligned Chunk + PageReader pageReader = + new PageReader( + pageData, header.getDataType(), valueDecoder, defaultTimeDecoder, null); + BatchData batchData = pageReader.getAllSatisfiedPageData(); + } + pageIndex++; + dataSize -= pageHeader.getSerializedPageSize(); + } + break; + case MetaMarker.CHUNK_GROUP_HEADER: + ChunkGroupHeader chunkGroupHeader = reader.readChunkGroupHeader(); + break; + case MetaMarker.OPERATION_INDEX_RANGE: + reader.readPlanIndex(); + break; + default: + MetaMarker.handleUnexpectedMarker(marker); + } + } + } catch (IOException e) { + LOG.error("Meet exception when checking integrity of tsfile", e); + Assert.fail(); + } + } + + public static void checkIntegrityByQuery( + String filename, Map>>> originData) { + try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) { + Map> allTimeseriesMetadata = + reader.getAllTimeseriesMetadata(true); + Assert.assertEquals(originData.size(), allTimeseriesMetadata.size()); + for (Map.Entry> entry : allTimeseriesMetadata.entrySet()) { + String deviceId = entry.getKey(); + List timeseriesMetadataList = entry.getValue(); + Assert.assertEquals(originData.get(deviceId).size(), timeseriesMetadataList.size()); + for (TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + String measurementId = timeseriesMetadata.getMeasurementId(); + List> originChunks = originData.get(deviceId).get(measurementId); + List chunkMetadataList = timeseriesMetadata.getChunkMetadataList(); + Assert.assertEquals(originChunks.size(), chunkMetadataList.size()); + chunkMetadataList.sort(Comparator.comparing(IChunkMetadata::getStartTime)); + for (int i = 0; i < chunkMetadataList.size(); ++i) { + Chunk chunk = reader.readMemChunk((ChunkMetadata) chunkMetadataList.get(i)); + ChunkReader chunkReader = new ChunkReader(chunk, null); + List originValue = originChunks.get(i); + for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { + IPointReader pointReader = chunkReader.nextPageData().getBatchDataIterator(); + while (pointReader.hasNextTimeValuePair()) { + Assert.assertEquals( + originValue.get(valIdx++).longValue(), + pointReader.nextTimeValuePair().getTimestamp()); + } + } + } + } + } + + } catch (IOException e) { + LOG.error("Meet exception when checking integrity of tsfile", e); + Assert.fail(); + } + } +} diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index 392a69922252d..32d2248ba5e85 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -27,6 +27,7 @@ import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; +import org.apache.iotdb.tsfile.write.TsFileIntegrityCheckingTool; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; @@ -41,24 +42,47 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Random; public class MemoryControlTsFileIOWriterTest extends MemoryControlTsFileIOWriter { private static File testFile = new File("target", "1-1-0-0.tsfile"); private static File emptyFile = new File("target", "temp"); - private static final int TEST_CHUNK_SIZE = 1000; + private long TEST_CHUNK_SIZE = 1000; + private List measurementDictInOrder = new ArrayList<>(); + private List deviceDictInOrder = new ArrayList<>(); + private boolean init = false; @Before - public void setUp() throws IOException {} + public void setUp() throws IOException { + if (!init) { + init = true; + for (int i = 0; i < 2048; ++i) { + measurementDictInOrder.add("s" + i); + deviceDictInOrder.add("root.sg.d" + i); + } + measurementDictInOrder.sort((String::compareTo)); + deviceDictInOrder.sort((String::compareTo)); + } + } @After public void tearDown() throws IOException { this.close(); - FileUtils.delete(testFile); - FileUtils.delete( - new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX)); - FileUtils.delete(emptyFile); + if (testFile.exists()) { + FileUtils.delete(testFile); + } + if (new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX) + .exists()) { + FileUtils.delete( + new File( + testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX)); + } + if (emptyFile.exists()) { + FileUtils.delete(emptyFile); + } } public MemoryControlTsFileIOWriterTest() throws IOException { @@ -71,26 +95,26 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { List originChunkMetadataList = new ArrayList<>(); for (int i = 0; i < 10; ++i) { - String deviceId = "root.sg.d" + i; + String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { ChunkWriterImpl chunkWriter; switch (j) { case 0: - chunkWriter = generateIntData(j); + chunkWriter = generateIntData(j, 0L); break; case 1: - chunkWriter = generateBooleanData(j); + chunkWriter = generateBooleanData(j, 0); break; case 2: - chunkWriter = generateFloatData(j); + chunkWriter = generateFloatData(j, 0L); break; case 3: - chunkWriter = generateDoubleData(j); + chunkWriter = generateDoubleData(j, 0L); break; case 4: default: - chunkWriter = generateTextData(j); + chunkWriter = generateTextData(j, 0L); break; } chunkWriter.writeToFileWriter(writer); @@ -109,7 +133,9 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); for (int i = 0; i < originChunkMetadataList.size(); ++i) { Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - Assert.assertEquals("root.sg.d" + i / 5 + ".s" + i % 5, chunkMetadataPair.left); + Assert.assertEquals( + deviceDictInOrder.get(i / 5) + "." + measurementDictInOrder.get(i % 5), + chunkMetadataPair.left); Assert.assertEquals( originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); Assert.assertEquals( @@ -129,9 +155,9 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { List originChunkMetadataList = new ArrayList<>(); for (int i = 0; i < 10; ++i) { - String deviceId = "root.sg.d" + i; + String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); - AlignedChunkWriterImpl chunkWriter = generateVectorData(i); + AlignedChunkWriterImpl chunkWriter = generateVectorData(i, 0L); chunkWriter.writeToFileWriter(writer); originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); @@ -165,7 +191,7 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); for (int i = 0; i < alignedChunkMetadata.size(); ++i) { Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - Assert.assertEquals("root.sg.d" + i, chunkMetadataPair.left); + Assert.assertEquals(deviceDictInOrder.get(i), chunkMetadataPair.left); Assert.assertEquals( alignedChunkMetadata.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); Assert.assertEquals( @@ -184,7 +210,7 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { List originChunkMetadataList = new ArrayList<>(); for (int i = 0; i < 10; ++i) { - String deviceId = "root.sg.d" + i; + String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); if (i % 2 == 0) { // write normal series @@ -192,20 +218,20 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { ChunkWriterImpl chunkWriter; switch (j) { case 0: - chunkWriter = generateIntData(j); + chunkWriter = generateIntData(j, 0L); break; case 1: - chunkWriter = generateBooleanData(j); + chunkWriter = generateBooleanData(j, 0L); break; case 2: - chunkWriter = generateFloatData(j); + chunkWriter = generateFloatData(j, 0L); break; case 3: - chunkWriter = generateDoubleData(j); + chunkWriter = generateDoubleData(j, 0L); break; case 4: default: - chunkWriter = generateTextData(j); + chunkWriter = generateTextData(j, 0L); break; } chunkWriter.writeToFileWriter(writer); @@ -213,7 +239,7 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { originChunkMetadataList.addAll(writer.chunkMetadataList); } else { // write vector - AlignedChunkWriterImpl chunkWriter = generateVectorData(i); + AlignedChunkWriterImpl chunkWriter = generateVectorData(i, 0L); chunkWriter.writeToFileWriter(writer); originChunkMetadataList.add( new AlignedChunkMetadata( @@ -236,11 +262,13 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); if (originChunkMetadataList.get(i) instanceof ChunkMetadata) { Assert.assertEquals( - "root.sg.d" + deviceCnt + "." + originChunkMetadataList.get(i).getMeasurementUid(), + deviceDictInOrder.get(deviceCnt) + + "." + + originChunkMetadataList.get(i).getMeasurementUid(), chunkMetadataPair.left); } else { deviceCnt++; - Assert.assertEquals("root.sg.d" + deviceCnt++, chunkMetadataPair.left); + Assert.assertEquals(deviceDictInOrder.get(deviceCnt++), chunkMetadataPair.left); } Assert.assertEquals( originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); @@ -255,47 +283,554 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { } } - private ChunkWriterImpl generateIntData(int idx) { + /** + * Write a file with 10 devices and 5 series in each device. For each series, we write one chunk + * for it. This test make sure that each chunk + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithNormalChunk() throws IOException { + Map>>> originTimes = new HashMap<>(); + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + List originChunkMetadataList = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + chunkWriter = generateIntData(j, 0L); + break; + case 1: + chunkWriter = generateBooleanData(j, 0L); + break; + case 2: + chunkWriter = generateFloatData(j, 0L); + break; + case 3: + chunkWriter = generateDoubleData(j, 0L); + break; + case 4: + default: + chunkWriter = generateTextData(j, 0L); + break; + } + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = 0; t < TEST_CHUNK_SIZE; ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + originChunkMetadataList.addAll(writer.chunkMetadataList); + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + } + + /** + * Write a file with 10 devices and 5 series in each device. For each series, we write 100 chunks + * for it. This test make sure that each chunk + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { + Map>>> originTimes = new HashMap<>(); + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 1: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 2: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 3: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 4: + default: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + } + } + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + } + + /** + * Write a file with 10 devices and 5 series in each device. For each series, we write 1024 chunks + * for it. This test make sure that each chunk + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { + Map>>> originTimes = new HashMap<>(); + long originTestChunkSize = TEST_CHUNK_SIZE; + TEST_CHUNK_SIZE = 10; + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + for (int k = 0; k < 1024; ++k) { + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 1: + for (int k = 0; k < 1024; ++k) { + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 2: + for (int k = 0; k < 1024; ++k) { + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 3: + for (int k = 0; k < 1024; ++k) { + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 4: + default: + for (int k = 0; k < 1024; ++k) { + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + } + } + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + writer.endFile(); + } finally { + TEST_CHUNK_SIZE = originTestChunkSize; + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + } + + /** + * Write a file with 10 devices and 1024 series in each device. For each series, we write 100 + * chunks for it. This test make sure that each chunk + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { + Map>>> originTimes = new HashMap<>(); + long originTestChunkSize = TEST_CHUNK_SIZE; + TEST_CHUNK_SIZE = 10; + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 1024; ++j) { + ChunkWriterImpl chunkWriter; + switch (j % 5) { + case 0: + for (int k = 0; k < 100; ++k) { + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 1: + for (int k = 0; k < 100; ++k) { + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 2: + for (int k = 0; k < 100; ++k) { + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 3: + for (int k = 0; k < 100; ++k) { + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 4: + default: + for (int k = 0; k < 100; ++k) { + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + } + } + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + writer.endFile(); + } finally { + TEST_CHUNK_SIZE = originTestChunkSize; + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + } + + /** + * Write a file with 1024 devices and 5 series in each device. For each series, we write 10 chunks + * for it. This test make sure that each chunk + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { + Map>>> originTimes = new HashMap<>(); + long originTestChunkSize = TEST_CHUNK_SIZE; + TEST_CHUNK_SIZE = 10; + try (MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + for (int i = 0; i < 1024; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j % 5) { + case 0: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 1: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 2: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 3: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + case 4: + default: + for (int k = 0; k < 10; ++k) { + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + chunkWriter.writeToFileWriter(writer); + List times = new ArrayList<>(); + for (long t = (long) TEST_CHUNK_SIZE * k; + t < (long) TEST_CHUNK_SIZE * (k + 1); + ++t) { + times.add(t); + } + originTimes + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .add(times); + } + break; + } + } + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + writer.endFile(); + } finally { + TEST_CHUNK_SIZE = originTestChunkSize; + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + } + + private ChunkWriterImpl generateIntData(int idx, long startTime) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.INT64)); + new ChunkWriterImpl( + new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.INT64)); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { chunkWriter.write(i, random.nextLong()); } return chunkWriter; } - private ChunkWriterImpl generateFloatData(int idx) { + private ChunkWriterImpl generateFloatData(int idx, long startTime) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.FLOAT)); + new ChunkWriterImpl( + new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.FLOAT)); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { chunkWriter.write(i, random.nextFloat()); } return chunkWriter; } - private ChunkWriterImpl generateDoubleData(int idx) { + private ChunkWriterImpl generateDoubleData(int idx, long startTime) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.DOUBLE)); + new ChunkWriterImpl( + new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.DOUBLE)); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { chunkWriter.write(i, random.nextDouble()); } return chunkWriter; } - private ChunkWriterImpl generateBooleanData(int idx) { + private ChunkWriterImpl generateBooleanData(int idx, long startTime) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.BOOLEAN)); + new ChunkWriterImpl( + new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.BOOLEAN)); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { chunkWriter.write(i, random.nextBoolean()); } return chunkWriter; } - private AlignedChunkWriterImpl generateVectorData(int idx) { + private AlignedChunkWriterImpl generateVectorData(int idx, long startTime) { List measurementSchemas = new ArrayList<>(); measurementSchemas.add(new MeasurementSchema("", TSDataType.INT32)); measurementSchemas.add(new MeasurementSchema("", TSDataType.INT64)); @@ -305,7 +840,7 @@ private AlignedChunkWriterImpl generateVectorData(int idx) { measurementSchemas.add(new MeasurementSchema("", TSDataType.TEXT)); AlignedChunkWriterImpl chunkWriter = new AlignedChunkWriterImpl(measurementSchemas); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { TsPrimitiveType[] points = new TsPrimitiveType[6]; points[0] = new TsPrimitiveType.TsInt(random.nextInt()); points[1] = new TsPrimitiveType.TsLong(random.nextLong()); @@ -318,11 +853,12 @@ private AlignedChunkWriterImpl generateVectorData(int idx) { return chunkWriter; } - private ChunkWriterImpl generateTextData(int idx) { + private ChunkWriterImpl generateTextData(int idx, long startTime) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl(new MeasurementSchema("s" + idx, TSDataType.TEXT)); + new ChunkWriterImpl( + new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.TEXT)); Random random = new Random(); - for (int i = 0; i < TEST_CHUNK_SIZE; ++i) { + for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { chunkWriter.write(i, new Binary(String.valueOf(random.nextDouble()))); } return chunkWriter; From 96e37498d317ce2b4c994d2b4a8032a0de84ac73 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Wed, 7 Sep 2022 17:11:40 +0800 Subject: [PATCH 08/31] recover TsFileSequenceRead --- .../main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java index d15882b3b8d69..aa946f67b7d5e 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java @@ -53,7 +53,7 @@ public class TsFileSequenceRead { "squid:S106" }) // Suppress high Cognitive Complexity and Standard outputs warning public static void main(String[] args) throws IOException { - String filename = "C:\\Users\\MARKLAU\\Desktop\\iotdb\\1-1-0-0.tsfile"; + String filename = "test.tsfile"; if (args.length >= 1) { filename = args[0]; } From baa9b2abd211ae523057b94454c17ef70b2df14d Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Thu, 8 Sep 2022 17:31:11 +0800 Subject: [PATCH 09/31] finish the verify utils for vector data --- .../writer/MemoryControlTsFileIOWriter.java | 49 +- .../write/TsFileIntegrityCheckingTool.java | 94 ++- .../MemoryControlTsFileIOWriterTest.java | 538 ++++++++---------- 3 files changed, 349 insertions(+), 332 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index fc47e74210bd4..eb5703ae293cf 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -62,7 +62,6 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { protected long currentChunkMetadataSize = 0L; protected File chunkMetadataTempFile; protected LocalTsFileOutput tempOutput; - protected final boolean autoControl; // it stores the start address of persisted chunk metadata for per series // protected Queue segmentForPerSeries = new ArrayDeque<>(); protected volatile boolean hasChunkMetadataInDisk = false; @@ -75,33 +74,27 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { private static final byte VECTOR_TYPE = 1; private static final byte NORMAL_TYPE = 2; - public MemoryControlTsFileIOWriter(File file, long maxMetadataSize, boolean autoControl) - throws IOException { + public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { super(file); this.maxMetadataSize = maxMetadataSize; this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_PREFIX); - this.autoControl = autoControl; } @Override public void endCurrentChunk() { currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); super.endCurrentChunk(); - if (this.autoControl) { - checkMetadataSizeAndMayFlush(); - } } - public boolean checkMetadataSizeAndMayFlush() { + public void checkMetadataSizeAndMayFlush() throws IOException { if (currentChunkMetadataSize > maxMetadataSize) { try { sortAndFlushChunkMetadata(); - return true; } catch (IOException e) { - LOG.error("Meets exception when flushing metadata to temp files", e); + LOG.error("Meets exception when flushing metadata to temp file for {}", file, e); + throw e; } } - return false; } protected void sortAndFlushChunkMetadata() throws IOException { @@ -135,13 +128,7 @@ private void writeChunkMetadata( if (iChunkMetadataList.size() == 0) { return; } - if (iChunkMetadataList.get(0).getDataType() == TSDataType.VECTOR) { - // pack the TimeChunkMetadata and List into List - List alignedChunkMetadata = packAlignedChunkMetadata(iChunkMetadataList); - writeAlignedChunkMetadata(alignedChunkMetadata, seriesPath, output); - } else { - writeNormalChunkMetadata(iChunkMetadataList, seriesPath, output); - } + writeNormalChunkMetadata(iChunkMetadataList, seriesPath, output); } private List packAlignedChunkMetadata(List iChunkMetadataList) { @@ -242,12 +229,19 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // read in all chunk metadata of one series // construct the timeseries metadata for this series TimeseriesMetadata timeseriesMetadata = readTimeseriesMetadata(iterator); + indexCount++; // build bloom filter filter.add(currentSeries); // construct the index tree node for the series - Path currentPath = new Path(currentSeries, true); - currentDevice = currentPath.getDevice(); + Path currentPath = null; + if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { + // remove the last . in the series id + currentDevice = currentSeries.substring(0, currentSeries.length() - 1); + } else { + currentPath = new Path(currentSeries, true); + currentDevice = currentPath.getDevice(); + } if (!currentDevice.equals(prevDevice)) { if (prevDevice != null) { addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); @@ -266,8 +260,12 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); } - currentIndexNode.addEntry( - new MetadataIndexEntry(currentPath.getMeasurement(), out.getPosition())); + if (timeseriesMetadata.getTSDataType() != TSDataType.VECTOR) { + currentIndexNode.addEntry( + new MetadataIndexEntry(currentPath.getMeasurement(), out.getPosition())); + } else { + currentIndexNode.addEntry(new MetadataIndexEntry("", out.getPosition())); + } } prevDevice = currentDevice; @@ -314,7 +312,12 @@ private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iter currentSeries = iterator.getAllChunkMetadataForNextSeries(iChunkMetadataList); TimeseriesMetadata timeseriesMetadata = super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList, false); - timeseriesMetadata.setMeasurementId(new Path(currentSeries, true).getMeasurement()); + if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { + // set empty measurement id for time column + timeseriesMetadata.setMeasurementId(""); + } else { + timeseriesMetadata.setMeasurementId(new Path(currentSeries, true).getMeasurement()); + } return timeseriesMetadata; } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java index b635d63a9c26f..43333a6e0296f 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java @@ -32,14 +32,19 @@ import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.read.TimeValuePair; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.read.common.BatchData; import org.apache.iotdb.tsfile.read.common.Chunk; +import org.apache.iotdb.tsfile.read.common.IBatchDataIterator; +import org.apache.iotdb.tsfile.read.reader.IChunkReader; import org.apache.iotdb.tsfile.read.reader.IPointReader; +import org.apache.iotdb.tsfile.read.reader.chunk.AlignedChunkReader; import org.apache.iotdb.tsfile.read.reader.chunk.ChunkReader; import org.apache.iotdb.tsfile.read.reader.page.PageReader; import org.apache.iotdb.tsfile.read.reader.page.TimePageReader; import org.apache.iotdb.tsfile.read.reader.page.ValuePageReader; +import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; import org.junit.Assert; @@ -49,6 +54,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -136,7 +142,8 @@ public static void checkIntegrityBySequenceRead(String filename) { } public static void checkIntegrityByQuery( - String filename, Map>>> originData) { + String filename, + Map>>>> originData) { try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) { Map> allTimeseriesMetadata = reader.getAllTimeseriesMetadata(true); @@ -144,23 +151,74 @@ public static void checkIntegrityByQuery( for (Map.Entry> entry : allTimeseriesMetadata.entrySet()) { String deviceId = entry.getKey(); List timeseriesMetadataList = entry.getValue(); - Assert.assertEquals(originData.get(deviceId).size(), timeseriesMetadataList.size()); - for (TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { - String measurementId = timeseriesMetadata.getMeasurementId(); - List> originChunks = originData.get(deviceId).get(measurementId); - List chunkMetadataList = timeseriesMetadata.getChunkMetadataList(); - Assert.assertEquals(originChunks.size(), chunkMetadataList.size()); - chunkMetadataList.sort(Comparator.comparing(IChunkMetadata::getStartTime)); - for (int i = 0; i < chunkMetadataList.size(); ++i) { - Chunk chunk = reader.readMemChunk((ChunkMetadata) chunkMetadataList.get(i)); - ChunkReader chunkReader = new ChunkReader(chunk, null); - List originValue = originChunks.get(i); - for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { - IPointReader pointReader = chunkReader.nextPageData().getBatchDataIterator(); - while (pointReader.hasNextTimeValuePair()) { - Assert.assertEquals( - originValue.get(valIdx++).longValue(), - pointReader.nextTimeValuePair().getTimestamp()); + boolean vectorMode = false; + if (timeseriesMetadataList.size() > 0 + && timeseriesMetadataList.get(0).getTSDataType() != TSDataType.VECTOR) { + Assert.assertEquals(originData.get(deviceId).size(), timeseriesMetadataList.size()); + } else { + vectorMode = true; + Assert.assertEquals(originData.get(deviceId).size(), timeseriesMetadataList.size() - 1); + } + + if (!vectorMode) { + // check integrity of not aligned series + for (TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + String measurementId = timeseriesMetadata.getMeasurementId(); + List>> originChunks = + originData.get(deviceId).get(measurementId); + List chunkMetadataList = timeseriesMetadata.getChunkMetadataList(); + Assert.assertEquals(originChunks.size(), chunkMetadataList.size()); + chunkMetadataList.sort(Comparator.comparing(IChunkMetadata::getStartTime)); + for (int i = 0; i < chunkMetadataList.size(); ++i) { + Chunk chunk = reader.readMemChunk((ChunkMetadata) chunkMetadataList.get(i)); + ChunkReader chunkReader = new ChunkReader(chunk, null); + List> originValue = originChunks.get(i); + for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { + IPointReader pointReader = chunkReader.nextPageData().getBatchDataIterator(); + while (pointReader.hasNextTimeValuePair()) { + TimeValuePair pair = pointReader.nextTimeValuePair(); + Assert.assertEquals( + originValue.get(valIdx).left.longValue(), pair.getTimestamp()); + try { + Assert.assertEquals(originValue.get(valIdx++).right, pair.getValue()); + } catch (Throwable e) { + System.out.println(); + } + } + } + } + } + } else { + // check integrity of vector type + // 1. check the time column + TimeseriesMetadata timeColumnMetadata = timeseriesMetadataList.get(0); + List timeChunkMetadataList = timeColumnMetadata.getChunkMetadataList(); + timeChunkMetadataList.sort(Comparator.comparing(IChunkMetadata::getStartTime)); + + for (int i = 1; i < timeseriesMetadataList.size(); ++i) { + List valueChunkMetadataList = + timeseriesMetadataList.get(i).getChunkMetadataList(); + Assert.assertEquals(timeChunkMetadataList.size(), valueChunkMetadataList.size()); + List>> originDataChunks = + originData.get(deviceId).get(timeseriesMetadataList.get(i).getMeasurementId()); + for (int chunkIdx = 0; chunkIdx < timeChunkMetadataList.size(); ++chunkIdx) { + Chunk timeChunk = + reader.readMemChunk((ChunkMetadata) timeChunkMetadataList.get(chunkIdx)); + Chunk valueChunk = + reader.readMemChunk((ChunkMetadata) valueChunkMetadataList.get(chunkIdx)); + IChunkReader chunkReader = + new AlignedChunkReader(timeChunk, Collections.singletonList(valueChunk), null); + List> originValue = originDataChunks.get(chunkIdx); + for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { + IBatchDataIterator pointReader = chunkReader.nextPageData().getBatchDataIterator(); + while (pointReader.hasNext()) { + long time = pointReader.currentTime(); + Assert.assertEquals(originValue.get(valIdx).left.longValue(), time); + Assert.assertEquals( + originValue.get(valIdx++).right.getValue(), + ((TsPrimitiveType[]) pointReader.currentValue())[0].getValue()); + pointReader.next(); + } } } } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index 32d2248ba5e85..e1d23667d0fce 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -19,10 +19,10 @@ package org.apache.iotdb.tsfile.write.writer; -import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; @@ -86,13 +86,14 @@ public void tearDown() throws IOException { } public MemoryControlTsFileIOWriterTest() throws IOException { - super(emptyFile, 1024, true); + super(emptyFile, 1024); } + /** The following tests is for ChunkMetadata serialization and deserialization. */ @Test public void testSerializeAndDeserializeChunkMetadata() throws IOException { try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { List originChunkMetadataList = new ArrayList<>(); for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); @@ -101,20 +102,20 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { ChunkWriterImpl chunkWriter; switch (j) { case 0: - chunkWriter = generateIntData(j, 0L); + chunkWriter = generateIntData(j, 0L, new ArrayList<>()); break; case 1: - chunkWriter = generateBooleanData(j, 0); + chunkWriter = generateBooleanData(j, 0, new ArrayList<>()); break; case 2: - chunkWriter = generateFloatData(j, 0L); + chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); break; case 3: - chunkWriter = generateDoubleData(j, 0L); + chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); break; case 4: default: - chunkWriter = generateTextData(j, 0L); + chunkWriter = generateTextData(j, 0L, new ArrayList<>()); break; } chunkWriter.writeToFileWriter(writer); @@ -152,54 +153,47 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { @Test public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { List originChunkMetadataList = new ArrayList<>(); for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); - AlignedChunkWriterImpl chunkWriter = generateVectorData(i, 0L); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>()); chunkWriter.writeToFileWriter(writer); originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); } + Map> originChunkMetadata = writer.groupChunkMetadataListBySeries(); writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); - List alignedChunkMetadata = new ArrayList<>(); - IChunkMetadata currentTimeChunkMetadata = originChunkMetadataList.get(0); - List currentValueChunkMetadata = new ArrayList<>(); - for (int i = 1; i < originChunkMetadataList.size(); ++i) { - if (originChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { - alignedChunkMetadata.add( - new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); - currentTimeChunkMetadata = originChunkMetadataList.get(i); - currentValueChunkMetadata = new ArrayList<>(); - } else { - currentValueChunkMetadata.add(originChunkMetadataList.get(i)); - } - } - if (currentValueChunkMetadata.size() > 0) { - alignedChunkMetadata.add( - new AlignedChunkMetadata(currentTimeChunkMetadata, currentValueChunkMetadata)); - } - ChunkMetadataReadIterator window = writer .new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - for (int i = 0; i < alignedChunkMetadata.size(); ++i) { - Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - Assert.assertEquals(deviceDictInOrder.get(i), chunkMetadataPair.left); - Assert.assertEquals( - alignedChunkMetadata.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); - Assert.assertEquals( - alignedChunkMetadata.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); - Assert.assertEquals( - alignedChunkMetadata.get(i).getDataType(), chunkMetadataPair.right.getDataType()); + List measurementIds = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + measurementIds.add(deviceDictInOrder.get(i) + "."); + for (int j = 1; j <= 6; ++j) { + measurementIds.add(deviceDictInOrder.get(i) + ".s" + j); + } + } + for (String measurementId : measurementIds) { + List chunkMetadata = new ArrayList<>(); + String seriesId = window.getAllChunkMetadataForNextSeries(chunkMetadata); + Assert.assertEquals(measurementId, seriesId); Assert.assertEquals( - alignedChunkMetadata.get(i).getStatistics(), chunkMetadataPair.right.getStatistics()); + originChunkMetadata.get(new Path(measurementId)).size(), chunkMetadata.size()); + for (int i = 0; i < chunkMetadata.size(); ++i) { + Assert.assertEquals( + originChunkMetadata.get(new Path(measurementId)).get(i).getStatistics(), + chunkMetadata.get(i).getStatistics()); + Assert.assertEquals( + originChunkMetadata.get(new Path(measurementId)).get(i).getDataType(), + chunkMetadata.get(i).getDataType()); + } } } } @@ -207,8 +201,9 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException @Test public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10, true)) { + new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { List originChunkMetadataList = new ArrayList<>(); + List seriesIds = new ArrayList<>(); for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); @@ -218,35 +213,35 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { ChunkWriterImpl chunkWriter; switch (j) { case 0: - chunkWriter = generateIntData(j, 0L); + chunkWriter = generateIntData(j, 0L, new ArrayList<>()); break; case 1: - chunkWriter = generateBooleanData(j, 0L); + chunkWriter = generateBooleanData(j, 0L, new ArrayList<>()); break; case 2: - chunkWriter = generateFloatData(j, 0L); + chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); break; case 3: - chunkWriter = generateDoubleData(j, 0L); + chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); break; case 4: default: - chunkWriter = generateTextData(j, 0L); + chunkWriter = generateTextData(j, 0L, new ArrayList<>()); break; } chunkWriter.writeToFileWriter(writer); + seriesIds.add(deviceId + "." + measurementDictInOrder.get(j)); } - originChunkMetadataList.addAll(writer.chunkMetadataList); } else { // write vector - AlignedChunkWriterImpl chunkWriter = generateVectorData(i, 0L); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>()); chunkWriter.writeToFileWriter(writer); - originChunkMetadataList.add( - new AlignedChunkMetadata( - writer.chunkMetadataList.get(0), - new ArrayList<>( - writer.chunkMetadataList.subList(1, writer.chunkMetadataList.size())))); + seriesIds.add(deviceId + "."); + for (int l = 1; l <= 6; ++l) { + seriesIds.add(deviceId + ".s" + l); + } } + originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); } writer.sortAndFlushChunkMetadata(); @@ -258,18 +253,9 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - for (int i = 0, deviceCnt = 0; i < originChunkMetadataList.size(); ++i) { + for (int i = 0; i < originChunkMetadataList.size(); ++i) { Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - if (originChunkMetadataList.get(i) instanceof ChunkMetadata) { - Assert.assertEquals( - deviceDictInOrder.get(deviceCnt) - + "." - + originChunkMetadataList.get(i).getMeasurementUid(), - chunkMetadataPair.left); - } else { - deviceCnt++; - Assert.assertEquals(deviceDictInOrder.get(deviceCnt++), chunkMetadataPair.left); - } + Assert.assertEquals(seriesIds.get(i), chunkMetadataPair.left); Assert.assertEquals( originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); Assert.assertEquals( @@ -283,6 +269,8 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { } } + /** The following tests is for writing normal series in different nums. */ + /** * Write a file with 10 devices and 5 series in each device. For each series, we write one chunk * for it. This test make sure that each chunk @@ -291,51 +279,46 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { */ @Test public void testWriteCompleteFileWithNormalChunk() throws IOException { - Map>>> originTimes = new HashMap<>(); - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024, true)) { - List originChunkMetadataList = new ArrayList<>(); + Map>>>> originData = new HashMap<>(); + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { + List> valList = new ArrayList<>(); ChunkWriterImpl chunkWriter; switch (j) { case 0: - chunkWriter = generateIntData(j, 0L); + chunkWriter = generateIntData(j, 0L, valList); break; case 1: - chunkWriter = generateBooleanData(j, 0L); + chunkWriter = generateBooleanData(j, 0L, valList); break; case 2: - chunkWriter = generateFloatData(j, 0L); + chunkWriter = generateFloatData(j, 0L, valList); break; case 3: - chunkWriter = generateDoubleData(j, 0L); + chunkWriter = generateDoubleData(j, 0L, valList); break; case 4: default: - chunkWriter = generateTextData(j, 0L); + chunkWriter = generateTextData(j, 0L, valList); break; } chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = 0; t < TEST_CHUNK_SIZE; ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } - originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); } Assert.assertTrue(writer.hasChunkMetadataInDisk); writer.endFile(); } TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); - TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } /** @@ -346,9 +329,8 @@ public void testWriteCompleteFileWithNormalChunk() throws IOException { */ @Test public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { - Map>>> originTimes = new HashMap<>(); - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + Map>>>> originData = new HashMap<>(); + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); @@ -357,83 +339,63 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { switch (j) { case 0: for (int k = 0; k < 10; ++k) { - chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 1: for (int k = 0; k < 10; ++k) { - chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 2: for (int k = 0; k < 10; ++k) { - chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 3: for (int k = 0; k < 10; ++k) { - chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 4: default: for (int k = 0; k < 10; ++k) { - chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; } @@ -444,23 +406,22 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { writer.endFile(); } TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); - TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } /** - * Write a file with 10 devices and 5 series in each device. For each series, we write 1024 chunks + * Write a file with 2 devices and 5 series in each device. For each series, we write 1024 chunks * for it. This test make sure that each chunk * * @throws IOException */ @Test public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { - Map>>> originTimes = new HashMap<>(); + Map>>>> originData = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 10; - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024, true)) { - for (int i = 0; i < 10; ++i) { + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < 2; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { @@ -468,83 +429,63 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { switch (j) { case 0: for (int k = 0; k < 1024; ++k) { - chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 1: for (int k = 0; k < 1024; ++k) { - chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 2: for (int k = 0; k < 1024; ++k) { - chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 3: for (int k = 0; k < 1024; ++k) { - chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 4: default: for (int k = 0; k < 1024; ++k) { - chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } - originTimes + writer.checkMetadataSizeAndMayFlush(); + originData .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; } @@ -557,107 +498,86 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { TEST_CHUNK_SIZE = originTestChunkSize; } TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); - TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } /** - * Write a file with 10 devices and 1024 series in each device. For each series, we write 100 - * chunks for it. This test make sure that each chunk + * Write a file with 2 devices and 1024 series in each device. For each series, we write 50 chunks + * for it. This test make sure that each chunk * * @throws IOException */ @Test public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { - Map>>> originTimes = new HashMap<>(); + Map>>>> originTimes = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; - TEST_CHUNK_SIZE = 10; - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024, true)) { - for (int i = 0; i < 10; ++i) { + TEST_CHUNK_SIZE = 1; + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < 2; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 1024; ++j) { ChunkWriterImpl chunkWriter; switch (j % 5) { case 0: - for (int k = 0; k < 100; ++k) { - chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + for (int k = 0; k < 50; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 1: - for (int k = 0; k < 100; ++k) { - chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + for (int k = 0; k < 50; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 2: - for (int k = 0; k < 100; ++k) { - chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + for (int k = 0; k < 50; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 3: - for (int k = 0; k < 100; ++k) { - chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + for (int k = 0; k < 50; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 4: default: - for (int k = 0; k < 100; ++k) { - chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + for (int k = 0; k < 50; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; } @@ -681,11 +601,10 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { */ @Test public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { - Map>>> originTimes = new HashMap<>(); + Map>>>> originTimes = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 10; - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024, true)) { + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { for (int i = 0; i < 1024; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); @@ -694,83 +613,63 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { switch (j % 5) { case 0: for (int k = 0; k < 10; ++k) { - chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 1: for (int k = 0; k < 10; ++k) { - chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 2: for (int k = 0; k < 10; ++k) { - chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 3: for (int k = 0; k < 10; ++k) { - chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; case 4: default: for (int k = 0; k < 10; ++k) { - chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k); + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - List times = new ArrayList<>(); - for (long t = (long) TEST_CHUNK_SIZE * k; - t < (long) TEST_CHUNK_SIZE * (k + 1); - ++t) { - times.add(t); - } + writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) - .add(times); + .add(valList); } break; } @@ -786,60 +685,111 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originTimes); } - private ChunkWriterImpl generateIntData(int idx, long startTime) { + /** The following tests is for writing aligned series. */ + + /** + * Test writing 10 align series, 6 in a group. + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithAlignedSeries() throws IOException { + Map>>>> originData = new HashMap<>(); + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + writer.startChunkGroup(deviceId); + List>> valList = new ArrayList<>(); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, valList); + for (int j = 1; j <= 6; ++j) { + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent("s" + j, x -> new ArrayList<>()) + .add(valList.get(j - 1)); + } + + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + writer.checkMetadataSizeAndMayFlush(); + } + writer.endFile(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + + /** The following tests is for writing mixed of normal series and aligned series */ + private ChunkWriterImpl generateIntData( + int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.INT64)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - chunkWriter.write(i, random.nextLong()); + long val = random.nextLong(); + chunkWriter.write(i, val); + record.add(new Pair<>(i, new TsPrimitiveType.TsLong(val))); } return chunkWriter; } - private ChunkWriterImpl generateFloatData(int idx, long startTime) { + private ChunkWriterImpl generateFloatData( + int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.FLOAT)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - chunkWriter.write(i, random.nextFloat()); + float val = random.nextFloat(); + chunkWriter.write(i, val); + record.add(new Pair<>(i, new TsPrimitiveType.TsFloat(val))); } return chunkWriter; } - private ChunkWriterImpl generateDoubleData(int idx, long startTime) { + private ChunkWriterImpl generateDoubleData( + int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.DOUBLE)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - chunkWriter.write(i, random.nextDouble()); + double val = random.nextDouble(); + chunkWriter.write(i, val); + record.add(new Pair<>(i, new TsPrimitiveType.TsDouble(val))); } return chunkWriter; } - private ChunkWriterImpl generateBooleanData(int idx, long startTime) { + private ChunkWriterImpl generateBooleanData( + int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.BOOLEAN)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - chunkWriter.write(i, random.nextBoolean()); + boolean val = random.nextBoolean(); + chunkWriter.write(i, val); + record.add(new Pair<>(i, new TsPrimitiveType.TsBoolean(val))); } return chunkWriter; } - private AlignedChunkWriterImpl generateVectorData(int idx, long startTime) { + private AlignedChunkWriterImpl generateVectorData( + long startTime, List>> record) { List measurementSchemas = new ArrayList<>(); - measurementSchemas.add(new MeasurementSchema("", TSDataType.INT32)); - measurementSchemas.add(new MeasurementSchema("", TSDataType.INT64)); - measurementSchemas.add(new MeasurementSchema("", TSDataType.FLOAT)); - measurementSchemas.add(new MeasurementSchema("", TSDataType.DOUBLE)); - measurementSchemas.add(new MeasurementSchema("", TSDataType.BOOLEAN)); - measurementSchemas.add(new MeasurementSchema("", TSDataType.TEXT)); + measurementSchemas.add(new MeasurementSchema("s1", TSDataType.INT32)); + measurementSchemas.add(new MeasurementSchema("s2", TSDataType.INT64)); + measurementSchemas.add(new MeasurementSchema("s3", TSDataType.FLOAT)); + measurementSchemas.add(new MeasurementSchema("s4", TSDataType.DOUBLE)); + measurementSchemas.add(new MeasurementSchema("s5", TSDataType.BOOLEAN)); + measurementSchemas.add(new MeasurementSchema("s6", TSDataType.TEXT)); AlignedChunkWriterImpl chunkWriter = new AlignedChunkWriterImpl(measurementSchemas); Random random = new Random(); + for (int i = 0; i < 6; ++i) { + record.add(new ArrayList<>()); + } for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { TsPrimitiveType[] points = new TsPrimitiveType[6]; points[0] = new TsPrimitiveType.TsInt(random.nextInt()); @@ -848,18 +798,24 @@ private AlignedChunkWriterImpl generateVectorData(int idx, long startTime) { points[3] = new TsPrimitiveType.TsDouble(random.nextDouble()); points[4] = new TsPrimitiveType.TsBoolean(random.nextBoolean()); points[5] = new TsPrimitiveType.TsBinary(new Binary(String.valueOf(random.nextDouble()))); + for (int j = 0; j < 6; ++j) { + record.get(j).add(new Pair<>(i, points[j])); + } chunkWriter.write(i, points); } return chunkWriter; } - private ChunkWriterImpl generateTextData(int idx, long startTime) { + private ChunkWriterImpl generateTextData( + int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = new ChunkWriterImpl( new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.TEXT)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - chunkWriter.write(i, new Binary(String.valueOf(random.nextDouble()))); + Binary val = new Binary(String.valueOf(random.nextDouble())); + chunkWriter.write(i, val); + record.add(new Pair<>(i, new TsPrimitiveType.TsBinary(val))); } return chunkWriter; } From c3a470d7a3296a8627d16dca735ebfaad0d5c104 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Thu, 8 Sep 2022 23:56:00 +0800 Subject: [PATCH 10/31] finish UT for aligned series --- .../writer/MemoryControlTsFileIOWriter.java | 1 + .../MemoryControlTsFileIOWriterTest.java | 179 ++++++++++++++++-- 2 files changed, 160 insertions(+), 20 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index eb5703ae293cf..0c171c441931a 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -87,6 +87,7 @@ public void endCurrentChunk() { } public void checkMetadataSizeAndMayFlush() throws IOException { + // This function should be called after all data of an aligned device has been written if (currentChunkMetadataSize > maxMetadataSize) { try { sortAndFlushChunkMetadata(); diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index e1d23667d0fce..5d7c7d8e28fdb 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -158,7 +158,7 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); - AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>()); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); chunkWriter.writeToFileWriter(writer); originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); @@ -234,7 +234,7 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { } } else { // write vector - AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>()); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); chunkWriter.writeToFileWriter(writer); seriesIds.add(deviceId + "."); for (int l = 1; l <= 6; ++l) { @@ -693,14 +693,14 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { * @throws IOException */ @Test - public void testWriteCompleteFileWithAlignedSeries() throws IOException { + public void testWriteCompleteFileWithAlignedSeriesWithOneChunk() throws IOException { Map>>>> originData = new HashMap<>(); try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { for (int i = 0; i < 10; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); List>> valList = new ArrayList<>(); - AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, valList); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, valList, 6); for (int j = 1; j <= 6; ++j) { originData .computeIfAbsent(deviceId, x -> new HashMap<>()) @@ -719,6 +719,122 @@ public void testWriteCompleteFileWithAlignedSeries() throws IOException { TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } + /** + * Test writing 1 aligned series, for each series we write 512 chunks + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithAlignedSeriesWithMultiChunks() throws IOException { + Map>>>> originData = new HashMap<>(); + int chunkNum = 512, seriesNum = 6; + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < 1; ++i) { + String deviceId = deviceDictInOrder.get(i); + for (int k = 0; k < chunkNum; ++k) { + writer.startChunkGroup(deviceId); + List>> valList = new ArrayList<>(); + AlignedChunkWriterImpl chunkWriter = + generateVectorData(k * TEST_CHUNK_SIZE, valList, seriesNum); + for (int j = 1; j <= seriesNum; ++j) { + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent("s" + j, x -> new ArrayList<>()) + .add(valList.get(j - 1)); + } + + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + } + writer.checkMetadataSizeAndMayFlush(); + } + writer.endFile(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + + /** + * Test write aligned chunk metadata, for each aligned series, we write 1024 components. + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithAlignedSeriesWithManyComponents() throws IOException { + Map>>>> originData = new HashMap<>(); + int chunkNum = 5, seriesNum = 1024; + long originTestPointNum = TEST_CHUNK_SIZE; + TEST_CHUNK_SIZE = 10; + try { + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < 10; ++i) { + String deviceId = deviceDictInOrder.get(i); + for (int k = 0; k < chunkNum; ++k) { + writer.startChunkGroup(deviceId); + List>> valList = new ArrayList<>(); + AlignedChunkWriterImpl chunkWriter = + generateVectorData(k * TEST_CHUNK_SIZE, valList, seriesNum); + for (int j = 1; j <= seriesNum; ++j) { + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent("s" + j, x -> new ArrayList<>()) + .add(valList.get(j - 1)); + } + + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + } + writer.checkMetadataSizeAndMayFlush(); + } + writer.endFile(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + } + } finally { + TEST_CHUNK_SIZE = originTestPointNum; + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + + @Test + public void testWriteCompleteFileWithLotsAlignedSeries() throws IOException { + Map>>>> originData = new HashMap<>(); + int chunkNum = 5, seriesNum = 12; + long originTestPointNum = TEST_CHUNK_SIZE; + TEST_CHUNK_SIZE = 10; + int deviceNum = 1024; + try { + try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + for (int i = 0; i < deviceNum; ++i) { + String deviceId = deviceDictInOrder.get(i); + for (int k = 0; k < chunkNum; ++k) { + writer.startChunkGroup(deviceId); + List>> valList = new ArrayList<>(); + AlignedChunkWriterImpl chunkWriter = + generateVectorData(k * TEST_CHUNK_SIZE, valList, seriesNum); + for (int j = 1; j <= seriesNum; ++j) { + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent("s" + j, x -> new ArrayList<>()) + .add(valList.get(j - 1)); + } + + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + } + writer.checkMetadataSizeAndMayFlush(); + } + writer.endFile(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + } + } finally { + TEST_CHUNK_SIZE = originTestPointNum; + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + /** The following tests is for writing mixed of normal series and aligned series */ private ChunkWriterImpl generateIntData( int idx, long startTime, List> record) { @@ -777,28 +893,51 @@ private ChunkWriterImpl generateBooleanData( } private AlignedChunkWriterImpl generateVectorData( - long startTime, List>> record) { + long startTime, List>> record, int seriesNum) { List measurementSchemas = new ArrayList<>(); - measurementSchemas.add(new MeasurementSchema("s1", TSDataType.INT32)); - measurementSchemas.add(new MeasurementSchema("s2", TSDataType.INT64)); - measurementSchemas.add(new MeasurementSchema("s3", TSDataType.FLOAT)); - measurementSchemas.add(new MeasurementSchema("s4", TSDataType.DOUBLE)); - measurementSchemas.add(new MeasurementSchema("s5", TSDataType.BOOLEAN)); - measurementSchemas.add(new MeasurementSchema("s6", TSDataType.TEXT)); + TSDataType[] dataTypes = + new TSDataType[] { + TSDataType.INT32, + TSDataType.INT64, + TSDataType.FLOAT, + TSDataType.DOUBLE, + TSDataType.BOOLEAN, + TSDataType.TEXT + }; + for (int i = 0; i < seriesNum; ++i) { + measurementSchemas.add(new MeasurementSchema("s" + (i + 1), dataTypes[i % 6])); + } AlignedChunkWriterImpl chunkWriter = new AlignedChunkWriterImpl(measurementSchemas); Random random = new Random(); - for (int i = 0; i < 6; ++i) { + for (int i = 0; i < seriesNum; ++i) { record.add(new ArrayList<>()); } for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { - TsPrimitiveType[] points = new TsPrimitiveType[6]; - points[0] = new TsPrimitiveType.TsInt(random.nextInt()); - points[1] = new TsPrimitiveType.TsLong(random.nextLong()); - points[2] = new TsPrimitiveType.TsFloat(random.nextFloat()); - points[3] = new TsPrimitiveType.TsDouble(random.nextDouble()); - points[4] = new TsPrimitiveType.TsBoolean(random.nextBoolean()); - points[5] = new TsPrimitiveType.TsBinary(new Binary(String.valueOf(random.nextDouble()))); - for (int j = 0; j < 6; ++j) { + TsPrimitiveType[] points = new TsPrimitiveType[seriesNum]; + for (int j = 0; j < seriesNum; ++j) { + switch (j % 6) { + case 0: + points[j] = new TsPrimitiveType.TsInt(random.nextInt()); + break; + case 1: + points[j] = new TsPrimitiveType.TsLong(random.nextLong()); + break; + case 2: + points[j] = new TsPrimitiveType.TsFloat(random.nextFloat()); + break; + case 3: + points[j] = new TsPrimitiveType.TsDouble(random.nextDouble()); + break; + case 4: + points[j] = new TsPrimitiveType.TsBoolean(random.nextBoolean()); + break; + case 5: + points[j] = + new TsPrimitiveType.TsBinary(new Binary(String.valueOf(random.nextDouble()))); + break; + } + } + for (int j = 0; j < seriesNum; ++j) { record.get(j).add(new Pair<>(i, points[j])); } chunkWriter.write(i, points); From a25aad0ab4d858432f2a1dae004c7e30a60ee5a3 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 10:15:29 +0800 Subject: [PATCH 11/31] refactor some codes --- .../writer/MemoryControlTsFileIOWriter.java | 113 ++++++------------ .../write/TsFileIntegrityCheckingTool.java | 30 ++++- 2 files changed, 63 insertions(+), 80 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 0c171c441931a..2cfa93e11b9ff 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -22,7 +22,6 @@ import org.apache.iotdb.tsfile.common.conf.TSFileConfig; import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; import org.apache.iotdb.tsfile.file.MetaMarker; -import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; @@ -62,8 +61,6 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { protected long currentChunkMetadataSize = 0L; protected File chunkMetadataTempFile; protected LocalTsFileOutput tempOutput; - // it stores the start address of persisted chunk metadata for per series - // protected Queue segmentForPerSeries = new ArrayDeque<>(); protected volatile boolean hasChunkMetadataInDisk = false; protected String currentSeries = null; // record the total num of path in order to make bloom filter @@ -71,7 +68,6 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { Path lastSerializePath = null; public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; - private static final byte VECTOR_TYPE = 1; private static final byte NORMAL_TYPE = 2; public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { @@ -86,6 +82,14 @@ public void endCurrentChunk() { super.endCurrentChunk(); } + /** + * Check if the size of chunk metadata in memory is greater than the given threshold. If so, the + * chunk metadata will be written to a temp files. Notice! If you are writing a aligned device, + * you should make sure all data of current writing device has been written before this method is + * called. For not aligned series, there is no such limitation. + * + * @throws IOException + */ public void checkMetadataSizeAndMayFlush() throws IOException { // This function should be called after all data of an aligned device has been written if (currentChunkMetadataSize > maxMetadataSize) { @@ -98,6 +102,12 @@ public void checkMetadataSizeAndMayFlush() throws IOException { } } + /** + * Sort the chunk metadata by the lexicographical order and the start time of the chunk, then + * flush them to a temp file. + * + * @throws IOException + */ protected void sortAndFlushChunkMetadata() throws IOException { // group by series Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); @@ -106,10 +116,11 @@ protected void sortAndFlushChunkMetadata() throws IOException { } hasChunkMetadataInDisk = true; // the file structure in temp file will be - // ChunkType | chunkSize | chunkBuffer + // chunkSize | chunkBuffer for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { Path seriesPath = entry.getKey(); if (!seriesPath.equals(lastSerializePath)) { + // record the count of path to construct bloom filter later pathCount++; } List iChunkMetadataList = entry.getValue(); @@ -126,48 +137,7 @@ protected void sortAndFlushChunkMetadata() throws IOException { private void writeChunkMetadata( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - if (iChunkMetadataList.size() == 0) { - return; - } - writeNormalChunkMetadata(iChunkMetadataList, seriesPath, output); - } - - private List packAlignedChunkMetadata(List iChunkMetadataList) { - IChunkMetadata currentTimeChunk = iChunkMetadataList.get(0); - List currentValueChunk = new ArrayList<>(); - List alignedChunkMetadata = new ArrayList<>(); - for (int i = 1; i < iChunkMetadataList.size(); ++i) { - if (iChunkMetadataList.get(i).getDataType() == TSDataType.VECTOR) { - alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - currentTimeChunk = iChunkMetadataList.get(i); - currentValueChunk = new ArrayList<>(); - } else { - currentValueChunk.add(iChunkMetadataList.get(i)); - } - } - if (currentValueChunk.size() > 0) { - alignedChunkMetadata.add(new AlignedChunkMetadata(currentTimeChunk, currentValueChunk)); - } - return alignedChunkMetadata; - } - - private void writeAlignedChunkMetadata( - List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) - throws IOException { - for (IChunkMetadata chunkMetadata : iChunkMetadataList) { - ReadWriteIOUtils.write(VECTOR_TYPE, output); - PublicBAOS buffer = new PublicBAOS(); - int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getDevice()); - ReadWriteIOUtils.write(size, output); - buffer.writeTo(output); - } - } - - private void writeNormalChunkMetadata( - List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) - throws IOException { for (IChunkMetadata chunkMetadata : iChunkMetadataList) { - ReadWriteIOUtils.write(NORMAL_TYPE, output); PublicBAOS buffer = new PublicBAOS(); int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getFullPath()); ReadWriteIOUtils.write(size, output); @@ -177,19 +147,20 @@ private void writeNormalChunkMetadata( @Override public void endFile() throws IOException { - if (hasChunkMetadataInDisk) { - // there is some chunk metadata already been written to the disk - // first we should flush the remaining chunk metadata in memory to disk - // then read the persisted chunk metadata from disk - sortAndFlushChunkMetadata(); - tempOutput.close(); - } else { - // sort the chunk metadata in memory, construct the index tree + if (!hasChunkMetadataInDisk) { + // all the chunk metadata is stored in memory + // sort the chunk metadata, construct the index tree // and just close the file super.endFile(); return; } + // there is some chunk metadata already been written to the disk + // first we should flush the remaining chunk metadata in memory to disk + // then read the persisted chunk metadata from disk + sortAndFlushChunkMetadata(); + tempOutput.close(); + // read in the chunk metadata, and construct the index tree readChunkMetadataAndConstructIndexTree(); @@ -237,7 +208,9 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // construct the index tree node for the series Path currentPath = null; if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { - // remove the last . in the series id + // this series is the time column of the aligned device + // the full series path will be like "root.sg.d." + // we remove the last . in the series id here currentDevice = currentSeries.substring(0, currentSeries.length() - 1); } else { currentPath = new Path(currentSeries, true); @@ -351,31 +324,30 @@ public boolean hasNextChunkMetadata() throws IOException { return currentPair != null || this.input.position() < endPosition; } - public Pair getNextSeriesNameAndChunkMetadata() throws IOException { + /** + * Read in next chunk, return the series full path and the chunk metadata. + * + * @return + * @throws IOException + */ + protected Pair getNextSeriesNameAndChunkMetadata() throws IOException { if (input.position() >= endPosition) { currentPair = null; return null; } - byte type = readNextChunkMetadataType(); int size = readNextChunkMetadataSize(); ByteBuffer chunkBuffer = ByteBuffer.allocate(size); ReadWriteIOUtils.readAsPossible(input, chunkBuffer); chunkBuffer.flip(); - if (type == NORMAL_TYPE) { - ChunkMetadata chunkMetadata = new ChunkMetadata(); - String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); - currentPair = new Pair<>(seriesPath, chunkMetadata); - } else { - AlignedChunkMetadata chunkMetadata = new AlignedChunkMetadata(); - String devicePath = - AlignedChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); - currentPair = new Pair<>(devicePath, chunkMetadata); - } + ChunkMetadata chunkMetadata = new ChunkMetadata(); + String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); + currentPair = new Pair<>(seriesPath, chunkMetadata); return currentPair; } public String getAllChunkMetadataForNextSeries(List iChunkMetadataList) throws IOException { + // TODO: read all the chunk metadata of a single series once instead of reading it iteratively if (currentPair == null) { if (!hasNextChunkMetadata()) { return null; @@ -400,13 +372,6 @@ public Pair getCurrentPair() { return currentPair; } - private byte readNextChunkMetadataType() throws IOException { - typeBuffer.clear(); - ReadWriteIOUtils.readAsPossible(input, typeBuffer); - typeBuffer.flip(); - return ReadWriteIOUtils.readByte(typeBuffer); - } - private int readNextChunkMetadataSize() throws IOException { sizeBuffer.clear(); ReadWriteIOUtils.readAsPossible(input, sizeBuffer); diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java index 43333a6e0296f..c97a9a07742a7 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/TsFileIntegrityCheckingTool.java @@ -63,6 +63,12 @@ public class TsFileIntegrityCheckingTool { private static Logger LOG = LoggerFactory.getLogger(TsFileIntegrityCheckingTool.class); + /** + * This method check the integrity of file by reading it from the start to the end. It mainly + * checks the integrity of the chunks. + * + * @param filename + */ public static void checkIntegrityBySequenceRead(String filename) { try (TsFileSequenceReader reader = new TsFileSequenceReader(filename)) { String headMagicString = reader.readHeadMagic(); @@ -141,6 +147,16 @@ public static void checkIntegrityBySequenceRead(String filename) { } } + /** + * This method checks the integrity of the file by mimicking the process of the query, which reads + * the metadata index tree first, and get the timeseries metadata list and chunk metadata list. + * After that, this method acquires single chunk according to chunk metadata, then it deserializes + * the chunk, and verifies the correctness of the data. + * + * @param filename File to be check + * @param originData The origin data in a map format, Device -> SeriesId -> List>, + * each inner list stands for a chunk. + */ public static void checkIntegrityByQuery( String filename, Map>>>> originData) { @@ -148,6 +164,7 @@ public static void checkIntegrityByQuery( Map> allTimeseriesMetadata = reader.getAllTimeseriesMetadata(true); Assert.assertEquals(originData.size(), allTimeseriesMetadata.size()); + // check each series for (Map.Entry> entry : allTimeseriesMetadata.entrySet()) { String deviceId = entry.getKey(); List timeseriesMetadataList = entry.getValue(); @@ -163,6 +180,7 @@ public static void checkIntegrityByQuery( if (!vectorMode) { // check integrity of not aligned series for (TimeseriesMetadata timeseriesMetadata : timeseriesMetadataList) { + // get its chunk metadata list, and read the chunk String measurementId = timeseriesMetadata.getMeasurementId(); List>> originChunks = originData.get(deviceId).get(measurementId); @@ -173,29 +191,27 @@ public static void checkIntegrityByQuery( Chunk chunk = reader.readMemChunk((ChunkMetadata) chunkMetadataList.get(i)); ChunkReader chunkReader = new ChunkReader(chunk, null); List> originValue = originChunks.get(i); + // deserialize the chunk and verify it with origin data for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { IPointReader pointReader = chunkReader.nextPageData().getBatchDataIterator(); while (pointReader.hasNextTimeValuePair()) { TimeValuePair pair = pointReader.nextTimeValuePair(); Assert.assertEquals( originValue.get(valIdx).left.longValue(), pair.getTimestamp()); - try { - Assert.assertEquals(originValue.get(valIdx++).right, pair.getValue()); - } catch (Throwable e) { - System.out.println(); - } + Assert.assertEquals(originValue.get(valIdx++).right, pair.getValue()); } } } } } else { // check integrity of vector type - // 1. check the time column + // get the timeseries metadata of the time column TimeseriesMetadata timeColumnMetadata = timeseriesMetadataList.get(0); List timeChunkMetadataList = timeColumnMetadata.getChunkMetadataList(); timeChunkMetadataList.sort(Comparator.comparing(IChunkMetadata::getStartTime)); for (int i = 1; i < timeseriesMetadataList.size(); ++i) { + // traverse each value column List valueChunkMetadataList = timeseriesMetadataList.get(i).getChunkMetadataList(); Assert.assertEquals(timeChunkMetadataList.size(), valueChunkMetadataList.size()); @@ -206,8 +222,10 @@ public static void checkIntegrityByQuery( reader.readMemChunk((ChunkMetadata) timeChunkMetadataList.get(chunkIdx)); Chunk valueChunk = reader.readMemChunk((ChunkMetadata) valueChunkMetadataList.get(chunkIdx)); + // construct an aligned chunk reader using time chunk and value chunk IChunkReader chunkReader = new AlignedChunkReader(timeChunk, Collections.singletonList(valueChunk), null); + // verify the values List> originValue = originDataChunks.get(chunkIdx); for (int valIdx = 0; chunkReader.hasNextSatisfiedPage(); ) { IBatchDataIterator pointReader = chunkReader.nextPageData().getBatchDataIterator(); From db30604d996427d24581d3844f40867d711deb21 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 10:37:47 +0800 Subject: [PATCH 12/31] add some comment --- .../write/writer/MemoryControlTsFileIOWriter.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 2cfa93e11b9ff..8187639b191c7 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -55,6 +55,17 @@ import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.checkAndBuildLevelIndex; import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; +/** + * This writer control the total size of chunk metadata to avoid OOM when writing massive + * timeseries. This writer can only be used in the scenarios where the chunk is written in + * order. The order means lexicographical order and time order. The lexicographical order + * requires that, if the writer is going to write a series S, all data of the all series + * smaller than S in lexicographical order has been written to the writer. The time order + * requires that, for a single series S, if the writer is going to write a chunk C of + * it, all chunks of S whose start time is smaller than C should have been written to + * the writer. If you do not comply with the above requirements, metadata index tree may be + * generated incorrectly. As a result, the file cannot be queried correctly. + */ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { private static final Logger LOG = LoggerFactory.getLogger(MemoryControlTsFileIOWriter.class); protected long maxMetadataSize; From f6372f80c830d6bf684ea2908da7cdd3975fad3d Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 10:48:23 +0800 Subject: [PATCH 13/31] recover aligned chunk metadata --- .../file/metadata/AlignedChunkMetadata.java | 37 ++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java index 2b7e238a669dd..e855ea4bad712 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java @@ -22,20 +22,16 @@ import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.read.common.TimeRange; import org.apache.iotdb.tsfile.read.controller.IChunkLoader; -import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; -import java.io.IOException; import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.List; public class AlignedChunkMetadata implements IChunkMetadata { // ChunkMetadata for time column - private IChunkMetadata timeChunkMetadata; + private final IChunkMetadata timeChunkMetadata; // ChunkMetadata for all subSensors in the vector - private List valueChunkMetadataList; + private final List valueChunkMetadataList; /** ChunkLoader of metadata, used to create IChunkReader */ private IChunkLoader chunkLoader; @@ -46,8 +42,6 @@ public AlignedChunkMetadata( this.valueChunkMetadataList = valueChunkMetadataList; } - public AlignedChunkMetadata() {} - @Override public Statistics getStatistics() { return valueChunkMetadataList.size() == 1 && valueChunkMetadataList.get(0) != null @@ -189,31 +183,8 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) { throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); } - public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) - throws IOException { - int byteLen = 0; - byteLen += ReadWriteIOUtils.write(valueChunkMetadataList.size() + 1, outputStream); - byteLen += timeChunkMetadata.serializeWithFullInfo(outputStream, seriesFullPath); - for (IChunkMetadata chunkMetadata : valueChunkMetadataList) { - byteLen += chunkMetadata.serializeWithFullInfo(outputStream, ""); - } - return byteLen; - } - - public static String deserializeWithFullInfo( - ByteBuffer buffer, AlignedChunkMetadata alignedChunkMetadata) throws IOException { - int chunkMetadataNum = ReadWriteIOUtils.readInt(buffer); - alignedChunkMetadata.timeChunkMetadata = new ChunkMetadata(); - alignedChunkMetadata.valueChunkMetadataList = new ArrayList<>(); - String deviceId = - ChunkMetadata.deserializeWithFullInfo( - buffer, (ChunkMetadata) alignedChunkMetadata.timeChunkMetadata); - for (int i = 1; i < chunkMetadataNum; ++i) { - ChunkMetadata metadata = new ChunkMetadata(); - ChunkMetadata.deserializeWithFullInfo(buffer, metadata); - alignedChunkMetadata.valueChunkMetadataList.add(metadata); - } - return deviceId; + public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) { + throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); } @Override From 432f7176eb04075e594d25d1354f51c3ba1c4986 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 11:06:47 +0800 Subject: [PATCH 14/31] refactor some code --- .../write/writer/MemoryControlTsFileIOWriter.java | 9 +++++---- .../iotdb/tsfile/write/writer/TsFileIOWriter.java | 15 ++++++--------- .../writer/MemoryControlTsFileIOWriterTest.java | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 8187639b191c7..39b19605564dd 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -37,6 +37,7 @@ import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,13 +79,12 @@ public class MemoryControlTsFileIOWriter extends TsFileIOWriter { protected int pathCount = 0; Path lastSerializePath = null; - public static final String CHUNK_METADATA_TEMP_FILE_PREFIX = ".cmt"; - private static final byte NORMAL_TYPE = 2; + public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".cmt"; public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { super(file); this.maxMetadataSize = maxMetadataSize; - this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_PREFIX); + this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_SUFFIX); } @Override @@ -181,6 +181,7 @@ public void endFile() throws IOException { // close file out.close(); canWrite = false; + FileUtils.delete(new File(file + CHUNK_METADATA_TEMP_FILE_SUFFIX)); } private void readChunkMetadataAndConstructIndexTree() throws IOException { @@ -296,7 +297,7 @@ private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iter List iChunkMetadataList = new ArrayList<>(); currentSeries = iterator.getAllChunkMetadataForNextSeries(iChunkMetadataList); TimeseriesMetadata timeseriesMetadata = - super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList, false); + super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList); if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { // set empty measurement id for time column timeseriesMetadata.setMeasurementId(""); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 89f5ad6d7bb19..15823e7bf4d7d 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -328,7 +328,11 @@ private MetadataIndexNode flushMetadataIndex(Map> chu // create device -> TimeseriesMetaDataList Map for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { // for ordinary path - constructOneTimeseriesMetadata(entry.getKey(), entry.getValue(), true); + TimeseriesMetadata timeseriesMetadata = + constructOneTimeseriesMetadata(entry.getKey(), entry.getValue()); + deviceTimeseriesMetadataMap + .computeIfAbsent(entry.getKey().getDevice(), k -> new ArrayList<>()) + .add(timeseriesMetadata); } // construct TsFileMetadata and return @@ -340,12 +344,10 @@ private MetadataIndexNode flushMetadataIndex(Map> chu * * @param path Path of chunk * @param chunkMetadataList List of chunkMetadata about path(previous param) - * @param needRecordInMap need to record the timeseries metadata in deviceTimeseriesMetadataMap * @return the constructed TimeseriesMetadata */ protected TimeseriesMetadata constructOneTimeseriesMetadata( - Path path, List chunkMetadataList, boolean needRecordInMap) - throws IOException { + Path path, List chunkMetadataList) throws IOException { // create TimeseriesMetaData PublicBAOS publicBAOS = new PublicBAOS(); TSDataType dataType = chunkMetadataList.get(chunkMetadataList.size() - 1).getDataType(); @@ -371,11 +373,6 @@ protected TimeseriesMetadata constructOneTimeseriesMetadata( dataType, seriesStatistics, publicBAOS); - if (needRecordInMap) { - deviceTimeseriesMetadataMap - .computeIfAbsent(path.getDevice(), k -> new ArrayList<>()) - .add(timeseriesMetadata); - } return timeseriesMetadata; } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index 5d7c7d8e28fdb..b9f9ff71f6cc3 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -74,11 +74,11 @@ public void tearDown() throws IOException { if (testFile.exists()) { FileUtils.delete(testFile); } - if (new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX) + if (new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX) .exists()) { FileUtils.delete( new File( - testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_PREFIX)); + testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX)); } if (emptyFile.exists()) { FileUtils.delete(emptyFile); From 2c16259ca977cde5ca092b10c20703229a72c2b2 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 15:01:52 +0800 Subject: [PATCH 15/31] Use MemoryControlTsFileIOWriter in ReadChunkCompactionPerformer --- .../performer/impl/ReadChunkCompactionPerformer.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java index 1a94214848e54..6442b1b5bbc89 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java @@ -21,6 +21,7 @@ import org.apache.iotdb.commons.conf.IoTDBConstant; import org.apache.iotdb.commons.exception.MetadataException; import org.apache.iotdb.commons.path.PartialPath; +import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.engine.compaction.inner.utils.AlignedSeriesCompactionExecutor; import org.apache.iotdb.db.engine.compaction.inner.utils.MultiTsFileDeviceIterator; import org.apache.iotdb.db.engine.compaction.inner.utils.SingleSeriesCompactionExecutor; @@ -28,11 +29,13 @@ import org.apache.iotdb.db.engine.compaction.task.CompactionTaskSummary; import org.apache.iotdb.db.engine.storagegroup.TsFileResource; import org.apache.iotdb.db.exception.StorageEngineException; +import org.apache.iotdb.db.rescon.SystemInfo; import org.apache.iotdb.tsfile.file.metadata.AlignedChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; @@ -63,8 +66,15 @@ public ReadChunkCompactionPerformer() {} @Override public void perform() throws IOException, MetadataException, InterruptedException, StorageEngineException { + // size for file writer is 5% of per compaction task memory budget + long sizeForFileWriter = + SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * 5 + / 100L; try (MultiTsFileDeviceIterator deviceIterator = new MultiTsFileDeviceIterator(seqFiles); - TsFileIOWriter writer = new TsFileIOWriter(targetResource.getTsFile())) { + MemoryControlTsFileIOWriter writer = + new MemoryControlTsFileIOWriter(targetResource.getTsFile(), sizeForFileWriter)) { while (deviceIterator.hasNextDevice()) { Pair deviceInfo = deviceIterator.nextDevice(); String device = deviceInfo.left; From 9bb54568c5f95fb7b22a6bd46f98e6104ed5efed Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 9 Sep 2022 16:07:56 +0800 Subject: [PATCH 16/31] Use MemoryControlTsFileIOWriter in ReadPointCompactionPerformer --- .../task/ReadPointPerformerSubTask.java | 7 +- .../performer/ICompactionPerformer.java | 4 +- .../impl/ReadPointCompactionPerformer.java | 111 +++++++----------- .../writer/AbstractCompactionWriter.java | 13 +- .../writer/CrossSpaceCompactionWriter.java | 29 ++++- .../writer/InnerSpaceCompactionWriter.java | 26 +++- .../ReadPointCompactionPerformerTest.java | 56 ++++----- .../inner/InnerSeqCompactionTest.java | 7 +- .../inner/InnerUnseqCompactionTest.java | 3 +- .../ReadChunkCompactionPerformerOldTest.java | 3 +- 10 files changed, 148 insertions(+), 111 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/cross/rewrite/task/ReadPointPerformerSubTask.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/cross/rewrite/task/ReadPointPerformerSubTask.java index 4280d2a6f54a2..5dd91bdd44a69 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/cross/rewrite/task/ReadPointPerformerSubTask.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/cross/rewrite/task/ReadPointPerformerSubTask.java @@ -33,7 +33,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.Callable; /** @@ -45,7 +44,7 @@ public class ReadPointPerformerSubTask implements Callable { private static final Logger logger = LoggerFactory.getLogger(IoTDBConstant.COMPACTION_LOGGER_NAME); private final String device; - private final Set measurementList; + private final List measurementList; private final FragmentInstanceContext fragmentInstanceContext; private final QueryDataSource queryDataSource; private final AbstractCompactionWriter compactionWriter; @@ -54,7 +53,7 @@ public class ReadPointPerformerSubTask implements Callable { public ReadPointPerformerSubTask( String device, - Set measurementList, + List measurementList, FragmentInstanceContext fragmentInstanceContext, QueryDataSource queryDataSource, AbstractCompactionWriter compactionWriter, @@ -87,7 +86,7 @@ public Void call() throws Exception { if (dataBlockReader.hasNextBatch()) { compactionWriter.startMeasurement(measurementSchemas, taskId); ReadPointCompactionPerformer.writeWithReader( - compactionWriter, dataBlockReader, taskId, false); + compactionWriter, dataBlockReader, device, taskId, false); compactionWriter.endMeasurement(taskId); } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/ICompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/ICompactionPerformer.java index 172eb50ee7bfb..2799c3236b9b0 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/ICompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/ICompactionPerformer.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.List; +import java.util.concurrent.ExecutionException; /** * CompactionPerformer is used to compact multiple files into one or multiple files. Different @@ -35,7 +36,8 @@ public interface ICompactionPerformer { void perform() - throws IOException, MetadataException, StorageEngineException, InterruptedException; + throws IOException, MetadataException, StorageEngineException, InterruptedException, + ExecutionException; void setTargetFiles(List targetFiles); diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadPointCompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadPointCompactionPerformer.java index 0b8df320acefb..dcc81f1e3b765 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadPointCompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadPointCompactionPerformer.java @@ -47,7 +47,6 @@ import org.apache.iotdb.db.utils.QueryUtils; import org.apache.iotdb.tsfile.file.header.ChunkHeader; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; -import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.TimeValuePair; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; @@ -57,7 +56,6 @@ import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -70,7 +68,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.stream.Collectors; @@ -106,7 +103,8 @@ public ReadPointCompactionPerformer() {} @Override public void perform() - throws IOException, MetadataException, StorageEngineException, InterruptedException { + throws IOException, MetadataException, StorageEngineException, InterruptedException, + ExecutionException { long queryId = QueryResourceManager.getInstance().assignCompactionQueryId(); FragmentInstanceContext fragmentInstanceContext = FragmentInstanceContext.createFragmentInstanceContextForCompaction(queryId); @@ -137,7 +135,6 @@ public void perform() } compactionWriter.endFile(); - updateDeviceStartTimeAndEndTime(targetFiles, compactionWriter); updatePlanIndexes(targetFiles, seqFiles, unseqFiles); } finally { clearReaderCache(); @@ -164,7 +161,8 @@ private void compactAlignedSeries( throws IOException, MetadataException { MultiTsFileDeviceIterator.AlignedMeasurementIterator alignedMeasurementIterator = deviceIterator.iterateAlignedSeries(device); - Set allMeasurements = alignedMeasurementIterator.getAllMeasurements(); + List allMeasurements = + new LinkedList<>(alignedMeasurementIterator.getAllMeasurements()); Map schemaMap = getMeasurementSchema(device, allMeasurements); List measurementSchemas = new ArrayList<>(schemaMap.values()); if (measurementSchemas.isEmpty()) { @@ -188,9 +186,10 @@ private void compactAlignedSeries( // chunkgroup is serialized only when at least one timeseries under this device has data compactionWriter.startChunkGroup(device, true); compactionWriter.startMeasurement(measurementSchemas, 0); - writeWithReader(compactionWriter, dataBlockReader, 0, true); + writeWithReader(compactionWriter, dataBlockReader, device, 0, true); compactionWriter.endMeasurement(0); compactionWriter.endChunkGroup(); + compactionWriter.checkAndMayFlushChunkMetadata(); } } @@ -200,55 +199,43 @@ private void compactNonAlignedSeries( AbstractCompactionWriter compactionWriter, FragmentInstanceContext fragmentInstanceContext, QueryDataSource queryDataSource) - throws IOException, InterruptedException, IllegalPathException { + throws IOException, InterruptedException, IllegalPathException, ExecutionException { MultiTsFileDeviceIterator.MeasurementIterator measurementIterator = deviceIterator.iterateNotAlignedSeries(device, false); - Set allMeasurements = measurementIterator.getAllMeasurements(); + List allMeasurements = new ArrayList<>(measurementIterator.getAllMeasurements()); + allMeasurements.sort((String::compareTo)); int subTaskNums = Math.min(allMeasurements.size(), subTaskNum); Map schemaMap = getMeasurementSchema(device, allMeasurements); - // assign all measurements to different sub tasks - Set[] measurementsForEachSubTask = new HashSet[subTaskNums]; - int idx = 0; - for (String measurement : allMeasurements) { - if (measurementsForEachSubTask[idx % subTaskNums] == null) { - measurementsForEachSubTask[idx % subTaskNums] = new HashSet<>(); - } - measurementsForEachSubTask[idx++ % subTaskNums].add(measurement); - } - // construct sub tasks and start compacting measurements in parallel - List> futures = new ArrayList<>(); compactionWriter.startChunkGroup(device, false); - for (int i = 0; i < subTaskNums; i++) { - futures.add( - CompactionTaskManager.getInstance() - .submitSubTask( - new ReadPointPerformerSubTask( - device, - measurementsForEachSubTask[i], - fragmentInstanceContext, - queryDataSource, - compactionWriter, - schemaMap, - i))); - } - - // wait for all sub tasks finish - for (int i = 0; i < subTaskNums; i++) { - try { - futures.get(i).get(); - } catch (ExecutionException e) { - LOGGER.error("[Compaction] SubCompactionTask meet errors ", e); - throw new IOException(e); + for (int taskCount = 0; taskCount < allMeasurements.size(); ) { + List> futures = new ArrayList<>(); + for (int i = 0; i < subTaskNums && taskCount < allMeasurements.size(); i++) { + futures.add( + CompactionTaskManager.getInstance() + .submitSubTask( + new ReadPointPerformerSubTask( + device, + Collections.singletonList(allMeasurements.get(taskCount++)), + fragmentInstanceContext, + queryDataSource, + compactionWriter, + schemaMap, + i))); + } + for (Future future : futures) { + future.get(); } + // sync all the subtask, and check the writer chunk metadata size + compactionWriter.checkAndMayFlushChunkMetadata(); } compactionWriter.endChunkGroup(); } private Map getMeasurementSchema( - String device, Set measurements) throws IllegalPathException, IOException { + String device, List measurements) throws IllegalPathException, IOException { HashMap schemaMap = new HashMap<>(); List allResources = new LinkedList<>(seqFiles); allResources.addAll(unseqFiles); @@ -317,29 +304,6 @@ private void clearReaderCache() throws IOException { } } - private static void updateDeviceStartTimeAndEndTime( - List targetResources, AbstractCompactionWriter compactionWriter) { - List targetFileWriters = compactionWriter.getFileIOWriter(); - for (int i = 0; i < targetFileWriters.size(); i++) { - TsFileIOWriter fileIOWriter = targetFileWriters.get(i); - TsFileResource fileResource = targetResources.get(i); - // The tmp target file may does not have any data points written due to the existence of the - // mods file, and it will be deleted after compaction. So skip the target file that has been - // deleted. - if (!fileResource.getTsFile().exists()) { - continue; - } - for (Map.Entry> entry : - fileIOWriter.getDeviceTimeseriesMetadataMap().entrySet()) { - String device = entry.getKey(); - for (TimeseriesMetadata timeseriesMetadata : entry.getValue()) { - fileResource.updateStartTime(device, timeseriesMetadata.getStatistics().getStartTime()); - fileResource.updateEndTime(device, timeseriesMetadata.getStatistics().getEndTime()); - } - } - } - } - /** * @param measurementIds if device is aligned, then measurementIds contain all measurements. If * device is not aligned, then measurementIds only contain one measurement. @@ -348,7 +312,7 @@ public static IDataBlockReader constructReader( String deviceId, List measurementIds, List measurementSchemas, - Set allSensors, + List allSensors, FragmentInstanceContext fragmentInstanceContext, QueryDataSource queryDataSource, boolean isAlign) @@ -363,11 +327,20 @@ public static IDataBlockReader constructReader( tsDataType = measurementSchemas.get(0).getType(); } return new SeriesDataBlockReader( - seriesPath, allSensors, tsDataType, fragmentInstanceContext, queryDataSource, true); + seriesPath, + new HashSet<>(allSensors), + tsDataType, + fragmentInstanceContext, + queryDataSource, + true); } public static void writeWithReader( - AbstractCompactionWriter writer, IDataBlockReader reader, int subTaskId, boolean isAligned) + AbstractCompactionWriter writer, + IDataBlockReader reader, + String device, + int subTaskId, + boolean isAligned) throws IOException { while (reader.hasNextBatch()) { TsBlock tsBlock = reader.nextBatch(); @@ -375,6 +348,7 @@ public static void writeWithReader( writer.write( tsBlock.getTimeColumn(), tsBlock.getValueColumns(), + device, subTaskId, tsBlock.getPositionCount()); } else { @@ -383,6 +357,7 @@ public static void writeWithReader( TimeValuePair timeValuePair = pointReader.nextTimeValuePair(); writer.write( timeValuePair.getTimestamp(), timeValuePair.getValue().getValue(), subTaskId); + writer.updateStartTimeAndEndTime(device, timeValuePair.getTimestamp(), subTaskId); } } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java index 542c44c4f0c90..eae92956b5ac0 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java @@ -32,6 +32,7 @@ import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.chunk.IChunkWriter; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -72,7 +73,8 @@ public void startMeasurement(List measurementSchemaList, int public abstract void write(long timestamp, Object value, int subTaskId) throws IOException; - public abstract void write(TimeColumn timestamps, Column[] columns, int subTaskId, int batchSize) + public abstract void write( + TimeColumn timestamps, Column[] columns, String device, int subTaskId, int batchSize) throws IOException; public abstract void endFile() throws IOException; @@ -140,6 +142,8 @@ protected void writeDataPoint(Long timestamp, Object value, int subTaskId) { measurementPointCountArray[subTaskId] += 1; } + public abstract void updateStartTimeAndEndTime(String device, long time, int subTaskId); + protected void flushChunkToFileWriter(TsFileIOWriter targetWriter, int subTaskId) throws IOException { writeRateLimit(chunkWriters[subTaskId].estimateMaxSeriesMemSize()); @@ -177,4 +181,11 @@ protected void writeRateLimit(long bytesLength) { } public abstract List getFileIOWriter(); + + public void checkAndMayFlushChunkMetadata() throws IOException { + List writers = this.getFileIOWriter(); + for (TsFileIOWriter writer : writers) { + ((MemoryControlTsFileIOWriter) writer).checkMetadataSizeAndMayFlush(); + } + } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java index 80902dd1d946f..9229f28afd0b7 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java @@ -18,12 +18,15 @@ */ package org.apache.iotdb.db.engine.compaction.writer; +import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.engine.storagegroup.TsFileResource; import org.apache.iotdb.db.query.control.FileReaderManager; +import org.apache.iotdb.db.rescon.SystemInfo; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.read.common.block.column.Column; import org.apache.iotdb.tsfile.read.common.block.column.TimeColumn; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -37,6 +40,7 @@ public class CrossSpaceCompactionWriter extends AbstractCompactionWriter { // source tsfiles private List seqTsFileResources; + private List targetTsFileResources; // Each sub task has its corresponding seq file index. // The index of the array corresponds to subTaskId. @@ -57,11 +61,20 @@ public class CrossSpaceCompactionWriter extends AbstractCompactionWriter { public CrossSpaceCompactionWriter( List targetResources, List seqFileResources) throws IOException { + this.targetTsFileResources = targetResources; currentDeviceEndTime = new long[seqFileResources.size()]; isEmptyFile = new boolean[seqFileResources.size()]; isDeviceExistedInTargetFiles = new boolean[targetResources.size()]; + long memorySizeForEachWriter = + SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * 5 + / 100L + / targetResources.size(); for (int i = 0; i < targetResources.size(); i++) { - this.fileWriterList.add(new TsFileIOWriter(targetResources.get(i).getTsFile())); + this.fileWriterList.add( + new MemoryControlTsFileIOWriter( + targetResources.get(i).getTsFile(), memorySizeForEachWriter)); isEmptyFile[i] = true; } this.seqTsFileResources = seqFileResources; @@ -111,12 +124,16 @@ public void write(long timestamp, Object value, int subTaskId) throws IOExceptio } @Override - public void write(TimeColumn timestamps, Column[] columns, int subTaskId, int batchSize) + public void write( + TimeColumn timestamps, Column[] columns, String device, int subTaskId, int batchSize) throws IOException { // todo control time range of target tsfile checkTimeAndMayFlushChunkToCurrentFile(timestamps.getStartTime(), subTaskId); AlignedChunkWriterImpl chunkWriter = (AlignedChunkWriterImpl) this.chunkWriters[subTaskId]; chunkWriter.write(timestamps, columns, batchSize); + TsFileResource resource = targetTsFileResources.get(seqFileIndexArray[subTaskId]); + resource.updateStartTime(device, timestamps.getStartTime()); + resource.updateEndTime(device, timestamps.getEndTime()); checkChunkSizeAndMayOpenANewChunk(fileWriterList.get(seqFileIndexArray[subTaskId]), subTaskId); isDeviceExistedInTargetFiles[seqFileIndexArray[subTaskId]] = true; isEmptyFile[seqFileIndexArray[subTaskId]] = false; @@ -192,4 +209,12 @@ private void checkIsDeviceExistAndGetDeviceEndTime() throws IOException { fileIndex++; } } + + @Override + public void updateStartTimeAndEndTime(String device, long time, int subTaskId) { + int fileIndex = seqFileIndexArray[subTaskId]; + TsFileResource resource = targetTsFileResources.get(fileIndex); + resource.updateStartTime(device, time); + resource.updateEndTime(device, time); + } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java index a73c6c29074fe..d8eb1b2f92009 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java @@ -18,10 +18,13 @@ */ package org.apache.iotdb.db.engine.compaction.writer; +import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.engine.storagegroup.TsFileResource; +import org.apache.iotdb.db.rescon.SystemInfo; import org.apache.iotdb.tsfile.read.common.block.column.Column; import org.apache.iotdb.tsfile.read.common.block.column.TimeColumn; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -29,13 +32,21 @@ import java.util.List; public class InnerSpaceCompactionWriter extends AbstractCompactionWriter { - private TsFileIOWriter fileWriter; + private MemoryControlTsFileIOWriter fileWriter; private boolean isEmptyFile; + private TsFileResource resource; public InnerSpaceCompactionWriter(TsFileResource targetFileResource) throws IOException { - this.fileWriter = new TsFileIOWriter(targetFileResource.getTsFile()); + long sizeForFileWriter = + SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * 5 + / 100L; + this.fileWriter = + new MemoryControlTsFileIOWriter(targetFileResource.getTsFile(), sizeForFileWriter); isEmptyFile = true; + resource = targetFileResource; } @Override @@ -65,11 +76,14 @@ public void write(long timestamp, Object value, int subTaskId) throws IOExceptio } @Override - public void write(TimeColumn timestamps, Column[] columns, int subTaskId, int batchSize) + public void write( + TimeColumn timestamps, Column[] columns, String device, int subTaskId, int batchSize) throws IOException { AlignedChunkWriterImpl chunkWriter = (AlignedChunkWriterImpl) this.chunkWriters[subTaskId]; chunkWriter.write(timestamps, columns, batchSize); checkChunkSizeAndMayOpenANewChunk(fileWriter, subTaskId); + resource.updateStartTime(device, timestamps.getStartTime()); + resource.updateEndTime(device, timestamps.getEndTime()); isEmptyFile = false; } @@ -89,6 +103,12 @@ public void close() throws IOException { fileWriter = null; } + @Override + public void updateStartTimeAndEndTime(String device, long time, int subTaskId) { + resource.updateStartTime(device, time); + resource.updateEndTime(device, time); + } + @Override public List getFileIOWriter() { return Collections.singletonList(fileWriter); diff --git a/server/src/test/java/org/apache/iotdb/db/engine/compaction/ReadPointCompactionPerformerTest.java b/server/src/test/java/org/apache/iotdb/db/engine/compaction/ReadPointCompactionPerformerTest.java index f0ee6155a1789..f12e7499f7e4c 100644 --- a/server/src/test/java/org/apache/iotdb/db/engine/compaction/ReadPointCompactionPerformerTest.java +++ b/server/src/test/java/org/apache/iotdb/db/engine/compaction/ReadPointCompactionPerformerTest.java @@ -62,6 +62,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import static org.apache.iotdb.commons.conf.IoTDBConstant.PATH_SEPARATOR; import static org.junit.Assert.assertEquals; @@ -92,7 +93,7 @@ public void tearDown() throws IOException, StorageEngineException { @Test public void testSeqInnerSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, false); createFiles(5, 2, 3, 100, 0, 0, 50, 50, false, true); @@ -167,7 +168,7 @@ public void testSeqInnerSpaceCompactionWithSameTimeseries() @Test public void testSeqInnerSpaceCompactionWithDifferentTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(5, 5, false); createFiles(2, 2, 3, 100, 0, 0, 50, 50, false, true); createFiles(2, 3, 5, 50, 250, 250, 50, 50, false, true); @@ -286,7 +287,7 @@ public void testSeqInnerSpaceCompactionWithDifferentTimeseries() @Test public void testUnSeqInnerSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, false); createFiles(5, 2, 3, 100, 0, 0, 50, 50, false, false); @@ -372,7 +373,7 @@ public void testUnSeqInnerSpaceCompactionWithSameTimeseries() @Test public void testUnSeqInnerSpaceCompactionWithDifferentTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(9, 9, false); createFiles(2, 2, 3, 100, 0, 0, 50, 50, false, false); createFiles(2, 3, 5, 50, 150, 150, 50, 50, false, false); @@ -498,7 +499,7 @@ public void testUnSeqInnerSpaceCompactionWithDifferentTimeseries() @Test public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, false); createFiles(2, 2, 3, 300, 0, 0, 0, 0, false, false); @@ -633,7 +634,7 @@ public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInTimeseries() @Test public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInDevice() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, false); createFiles(2, 2, 3, 300, 0, 0, 0, 0, false, false); @@ -761,7 +762,7 @@ public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInDevice() @Test public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInTargetFile() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, false); createFiles(2, 2, 3, 300, 0, 0, 0, 0, false, false); @@ -853,7 +854,7 @@ public void testUnSeqInnerSpaceCompactionWithAllDataDeletedInTargetFile() @Test public void testAlignedSeqInnerSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, true); createFiles(5, 2, 3, 100, 0, 0, 50, 50, true, true); @@ -950,7 +951,7 @@ public void testAlignedSeqInnerSpaceCompactionWithSameTimeseries() @Test public void testAlignedSeqInnerSpaceCompactionWithDifferentTimeseriesAndEmptyPage() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(50); registerTimeseriesInMManger(5, 7, true); createFiles(2, 2, 3, 100, 0, 0, 50, 50, true, true); @@ -1072,7 +1073,7 @@ public void testAlignedSeqInnerSpaceCompactionWithDifferentTimeseriesAndEmptyPag @Test public void testAlignedSeqInnerSpaceCompactionWithDifferentTimeseriesAndEmptyChunk() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(5, 7, true); createFiles(2, 2, 3, 100, 0, 0, 50, 50, true, true); createFiles(2, 3, 5, 50, 250, 250, 50, 50, true, true); @@ -1193,7 +1194,7 @@ public void testAlignedSeqInnerSpaceCompactionWithDifferentTimeseriesAndEmptyChu @Test public void testAlignedUnSeqInnerSpaceCompactionWithEmptyChunkAndEmptyPage() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, true); createFiles(2, 2, 3, 300, 0, 0, 0, 0, true, false); @@ -1326,7 +1327,7 @@ public void testAlignedUnSeqInnerSpaceCompactionWithEmptyChunkAndEmptyPage() @Test public void testAlignedUnSeqInnerSpaceCompactionWithAllDataDeletedInTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, true); createFiles(2, 2, 3, 300, 0, 0, 0, 0, true, false); @@ -1507,7 +1508,7 @@ public void testAlignedUnSeqInnerSpaceCompactionWithAllDataDeletedInTimeseries() @Test public void testAlignedUnSeqInnerSpaceCompactionWithAllDataDeletedInDevice() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(5, 7, true); createFiles(2, 2, 3, 300, 0, 0, 0, 0, true, false); @@ -1655,7 +1656,7 @@ public void testAlignedUnSeqInnerSpaceCompactionWithAllDataDeletedInDevice() @Test public void testAlignedUnSeqInnerSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, true); createFiles(5, 2, 3, 100, 0, 0, 50, 50, true, false); @@ -1754,7 +1755,7 @@ public void testAlignedUnSeqInnerSpaceCompactionWithSameTimeseries() @Test public void testCrossSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, false); createFiles(5, 2, 3, 100, 0, 0, 0, 0, false, true); createFiles(5, 2, 3, 50, 0, 10000, 50, 50, false, false); @@ -1845,7 +1846,7 @@ public void testCrossSpaceCompactionWithSameTimeseries() @Test public void testCrossSpaceCompactionWithDifferentTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, false); createFiles(2, 2, 3, 300, 0, 0, 50, 50, false, true); @@ -2031,7 +2032,7 @@ public void testCrossSpaceCompactionWithDifferentTimeseries() @Test public void testCrossSpaceCompactionWithAllDataDeletedInTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, false); createFiles(2, 2, 3, 300, 0, 0, 50, 50, false, true); @@ -2229,7 +2230,7 @@ public void testCrossSpaceCompactionWithAllDataDeletedInTimeseries() @Test public void testCrossSpaceCompactionWithAllDataDeletedInDevice() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, false); createFiles(2, 2, 3, 300, 0, 0, 50, 50, false, true); @@ -2418,7 +2419,7 @@ public void testCrossSpaceCompactionWithAllDataDeletedInDevice() @Test public void testCrossSpaceCompactionWithAllDataDeletedInOneTargetFile() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, false); createFiles(2, 2, 3, 300, 0, 0, 50, 50, false, true); @@ -2584,7 +2585,7 @@ public void testCrossSpaceCompactionWithAllDataDeletedInOneTargetFile() @Test public void testCrossSpaceCompactionWithAllDataDeletedInDeviceInSeqFiles() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, false); createFiles(2, 2, 3, 300, 0, 0, 50, 50, false, true); @@ -2789,7 +2790,7 @@ public void testCrossSpaceCompactionWithAllDataDeletedInDeviceInSeqFiles() @Test public void testAlignedCrossSpaceCompactionWithSameTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { registerTimeseriesInMManger(2, 3, true); createFiles(5, 2, 3, 100, 0, 0, 0, 0, true, true); createFiles(5, 2, 3, 50, 0, 10000, 50, 50, true, false); @@ -2889,7 +2890,7 @@ public void testAlignedCrossSpaceCompactionWithSameTimeseries() @Test public void testAlignedCrossSpaceCompactionWithDifferentTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, true); createFiles(2, 2, 3, 300, 0, 0, 50, 50, true, true); @@ -3054,7 +3055,7 @@ public void testAlignedCrossSpaceCompactionWithDifferentTimeseries() @Test public void testAlignedCrossSpaceCompactionWithAllDataDeletedInTimeseries() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, true); createFiles(2, 2, 3, 300, 0, 0, 50, 50, true, true); @@ -3291,7 +3292,7 @@ public void testAlignedCrossSpaceCompactionWithAllDataDeletedInTimeseries() @Test public void testAlignedCrossSpaceCompactionWithAllDataDeletedInOneTargetFile() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, true); createFiles(2, 2, 3, 300, 0, 0, 50, 50, true, true); @@ -3500,7 +3501,7 @@ public void testAlignedCrossSpaceCompactionWithAllDataDeletedInOneTargetFile() @Test public void testAlignedCrossSpaceCompactionWithFileTimeIndexResource() throws IOException, WriteProcessException, MetadataException, StorageEngineException, - InterruptedException { + InterruptedException, ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); registerTimeseriesInMManger(4, 5, true); createFiles(2, 2, 3, 300, 0, 0, 50, 50, true, true); @@ -3694,7 +3695,7 @@ public void testAlignedCrossSpaceCompactionWithFileTimeIndexResource() } @Test - public void testCrossSpaceCompactionWithNewDeviceInUnseqFile() { + public void testCrossSpaceCompactionWithNewDeviceInUnseqFile() throws ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); try { registerTimeseriesInMManger(6, 6, false); @@ -3769,7 +3770,8 @@ public void testCrossSpaceCompactionWithNewDeviceInUnseqFile() { } @Test - public void testCrossSpaceCompactionWithDeviceMaxTimeLaterInUnseqFile() { + public void testCrossSpaceCompactionWithDeviceMaxTimeLaterInUnseqFile() + throws ExecutionException { TSFileDescriptor.getInstance().getConfig().setMaxNumberOfPointsInPage(30); try { registerTimeseriesInMManger(6, 6, false); diff --git a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerSeqCompactionTest.java b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerSeqCompactionTest.java index 09e17a763fdb7..095a6d260fc4a 100644 --- a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerSeqCompactionTest.java +++ b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerSeqCompactionTest.java @@ -63,6 +63,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicInteger; import static org.apache.iotdb.db.engine.compaction.utils.CompactionCheckerUtils.putChunk; @@ -350,7 +351,7 @@ public void testDeserializePage() throws MetadataException, IOException, WritePr } } } - } catch (InterruptedException | StorageEngineException e) { + } catch (InterruptedException | StorageEngineException | ExecutionException e) { e.printStackTrace(); } finally { IoTDBDescriptor.getInstance() @@ -365,7 +366,7 @@ public void testDeserializePage() throws MetadataException, IOException, WritePr @Test public void testAppendPage() throws IOException, MetadataException, InterruptedException, StorageEngineException, - WriteProcessException { + WriteProcessException, ExecutionException { for (int toMergeFileNum : toMergeFileNums) { for (CompactionTimeseriesType compactionTimeseriesType : compactionTimeseriesTypes) { @@ -632,7 +633,7 @@ public void testAppendPage() @Test public void testAppendChunk() throws IOException, IllegalPathException, MetadataException, StorageEngineException, - WriteProcessException { + WriteProcessException, ExecutionException { long prevChunkPointNumLowerBoundInCompaction = IoTDBDescriptor.getInstance().getConfig().getChunkPointNumLowerBoundInCompaction(); IoTDBDescriptor.getInstance().getConfig().setChunkPointNumLowerBoundInCompaction(1); diff --git a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java index 588b1af97e056..63055f1e9a08e 100644 --- a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java +++ b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java @@ -57,6 +57,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutionException; import static org.apache.iotdb.db.engine.compaction.utils.CompactionCheckerUtils.putOnePageChunk; @@ -132,7 +133,7 @@ public void tearDown() throws IOException, StorageEngineException { @Test public void test() throws MetadataException, IOException, StorageEngineException, WriteProcessException, - InterruptedException { + InterruptedException, ExecutionException { for (int toMergeFileNum : toMergeFileNums) { for (CompactionTimeseriesType compactionTimeseriesType : compactionTimeseriesTypes) { for (boolean compactionBeforeHasMod : compactionBeforeHasMods) { diff --git a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/ReadChunkCompactionPerformerOldTest.java b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/ReadChunkCompactionPerformerOldTest.java index 59b0ab3d3d6f8..b48d494c0beaa 100644 --- a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/ReadChunkCompactionPerformerOldTest.java +++ b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/ReadChunkCompactionPerformerOldTest.java @@ -45,6 +45,7 @@ import java.io.File; import java.io.IOException; import java.util.Collections; +import java.util.concurrent.ExecutionException; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -78,7 +79,7 @@ public void tearDown() throws IOException, StorageEngineException { @Test public void testCompact() throws IOException, MetadataException, InterruptedException, StorageEngineException, - WriteProcessException { + WriteProcessException, ExecutionException { TsFileResource targetTsFileResource = new TsFileResource( new File( From 0bd5a4c731ad1d1c8f8a8f8bbca00823a7f616fa Mon Sep 17 00:00:00 2001 From: LiuXuxin Date: Mon, 12 Sep 2022 19:18:52 +0800 Subject: [PATCH 17/31] control chunk metadata size in ReadChunkCompactionPerformer --- .../inner/utils/AlignedSeriesCompactionExecutor.java | 7 ++++--- .../inner/utils/SingleSeriesCompactionExecutor.java | 11 +++++++---- .../performer/impl/ReadChunkCompactionPerformer.java | 7 ++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java index c9c36378f506f..3bc256507bab2 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java @@ -39,7 +39,7 @@ import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import com.google.common.util.concurrent.RateLimiter; @@ -56,7 +56,7 @@ public class AlignedSeriesCompactionExecutor { private final LinkedList>> readerAndChunkMetadataList; private final TsFileResource targetResource; - private final TsFileIOWriter writer; + private final MemoryControlTsFileIOWriter writer; private final AlignedChunkWriterImpl chunkWriter; private final List schemaList; @@ -73,7 +73,7 @@ public AlignedSeriesCompactionExecutor( String device, TsFileResource targetResource, LinkedList>> readerAndChunkMetadataList, - TsFileIOWriter writer) + MemoryControlTsFileIOWriter writer) throws IOException { this.device = device; this.readerAndChunkMetadataList = readerAndChunkMetadataList; @@ -151,6 +151,7 @@ public void execute() throws IOException { chunkWriter.estimateMaxSeriesMemSize()); chunkWriter.writeToFileWriter(writer); } + writer.checkMetadataSizeAndMayFlush(); } private void compactOneAlignedChunk(AlignedChunkReader chunkReader) throws IOException { diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java index 40f2632d0e2cf..c852fa565e1ec 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java @@ -37,7 +37,7 @@ import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import com.google.common.util.concurrent.RateLimiter; @@ -50,7 +50,7 @@ public class SingleSeriesCompactionExecutor { private String device; private PartialPath series; private LinkedList>> readerAndChunkMetadataList; - private TsFileIOWriter fileWriter; + private MemoryControlTsFileIOWriter fileWriter; private TsFileResource targetResource; private IMeasurementSchema schema; @@ -77,7 +77,7 @@ public SingleSeriesCompactionExecutor( PartialPath series, IMeasurementSchema measurementSchema, LinkedList>> readerAndChunkMetadataList, - TsFileIOWriter fileWriter, + MemoryControlTsFileIOWriter fileWriter, TsFileResource targetResource) { this.device = series.getDevice(); this.series = series; @@ -93,7 +93,7 @@ public SingleSeriesCompactionExecutor( public SingleSeriesCompactionExecutor( PartialPath series, LinkedList>> readerAndChunkMetadataList, - TsFileIOWriter fileWriter, + MemoryControlTsFileIOWriter fileWriter, TsFileResource targetResource) { this.device = series.getDevice(); this.series = series; @@ -310,6 +310,7 @@ private void flushChunkToFileWriter( false, getChunkSize(chunk)); fileWriter.writeChunk(chunk, chunkMetadata); + fileWriter.checkMetadataSizeAndMayFlush(); } private void flushChunkWriterIfLargeEnough() throws IOException { @@ -323,6 +324,7 @@ private void flushChunkWriterIfLargeEnough() throws IOException { false, chunkWriter.estimateMaxSeriesMemSize()); chunkWriter.writeToFileWriter(fileWriter); + fileWriter.checkMetadataSizeAndMayFlush(); pointCountInChunkWriter = 0L; } } @@ -345,6 +347,7 @@ private void flushChunkWriter() throws IOException { false, chunkWriter.estimateMaxSeriesMemSize()); chunkWriter.writeToFileWriter(fileWriter); + fileWriter.checkMetadataSizeAndMayFlush(); pointCountInChunkWriter = 0L; } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java index 6442b1b5bbc89..38e7f036d6911 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java @@ -34,9 +34,7 @@ import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.utils.Pair; -import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; -import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -116,7 +114,7 @@ public void setSummary(CompactionTaskSummary summary) { private void compactAlignedSeries( String device, TsFileResource targetResource, - TsFileIOWriter writer, + MemoryControlTsFileIOWriter writer, MultiTsFileDeviceIterator deviceIterator) throws IOException, InterruptedException { checkThreadInterrupted(); @@ -139,7 +137,7 @@ private void checkThreadInterrupted() throws InterruptedException { private void compactNotAlignedSeries( String device, TsFileResource targetResource, - TsFileIOWriter writer, + MemoryControlTsFileIOWriter writer, MultiTsFileDeviceIterator deviceIterator) throws IOException, MetadataException, InterruptedException { MultiTsFileDeviceIterator.MeasurementIterator seriesIterator = @@ -148,7 +146,6 @@ private void compactNotAlignedSeries( checkThreadInterrupted(); // TODO: we can provide a configuration item to enable concurrent between each series PartialPath p = new PartialPath(device, seriesIterator.nextSeries()); - IMeasurementSchema measurementSchema; // TODO: seriesIterator needs to be refactor. // This statement must be called before next hasNextSeries() called, or it may be trapped in a // dead-loop. From c1430b833e68c4896af2fa0dea1c53e335e80f87 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 13 Sep 2022 10:42:08 +0800 Subject: [PATCH 18/31] add log for debug --- .../compaction/inner/InnerUnseqCompactionTest.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java index 63055f1e9a08e..1fd50b21febbf 100644 --- a/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java +++ b/server/src/test/java/org/apache/iotdb/db/engine/compaction/inner/InnerUnseqCompactionTest.java @@ -47,6 +47,8 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; @@ -62,6 +64,7 @@ import static org.apache.iotdb.db.engine.compaction.utils.CompactionCheckerUtils.putOnePageChunk; public class InnerUnseqCompactionTest { + private static final Logger LOG = LoggerFactory.getLogger(InnerUnseqCompactionTest.class); static final String COMPACTION_TEST_SG = "root.compactionTest"; static final String[] fullPaths = new String[] { @@ -352,6 +355,13 @@ public void test() toDeleteTimeseriesAndTime, tsFileResource, false); } } + LOG.error( + "{} {} {} {} {}", + toMergeFileNum, + compactionTimeseriesType, + compactionBeforeHasMod, + compactionHasMod, + compactionOverlapType); TsFileResource targetTsFileResource = CompactionFileGeneratorUtils.getInnerCompactionTargetTsFileResources( toMergeResources, false) From 97c34e98667e6d76783db9408d3341c812c35bc5 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 13 Sep 2022 15:31:43 +0800 Subject: [PATCH 19/31] fix npe bug --- .../writer/CrossSpaceCompactionWriter.java | 20 ++++++++++++------- .../writer/InnerSpaceCompactionWriter.java | 14 +++++++++---- .../writer/MemoryControlTsFileIOWriter.java | 1 + 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java index 9229f28afd0b7..038d009d16c31 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java @@ -131,9 +131,12 @@ public void write( checkTimeAndMayFlushChunkToCurrentFile(timestamps.getStartTime(), subTaskId); AlignedChunkWriterImpl chunkWriter = (AlignedChunkWriterImpl) this.chunkWriters[subTaskId]; chunkWriter.write(timestamps, columns, batchSize); - TsFileResource resource = targetTsFileResources.get(seqFileIndexArray[subTaskId]); - resource.updateStartTime(device, timestamps.getStartTime()); - resource.updateEndTime(device, timestamps.getEndTime()); + synchronized (this) { + // we need to synchronized here to avoid multi-thread competition in sub-task + TsFileResource resource = targetTsFileResources.get(seqFileIndexArray[subTaskId]); + resource.updateStartTime(device, timestamps.getStartTime()); + resource.updateEndTime(device, timestamps.getEndTime()); + } checkChunkSizeAndMayOpenANewChunk(fileWriterList.get(seqFileIndexArray[subTaskId]), subTaskId); isDeviceExistedInTargetFiles[seqFileIndexArray[subTaskId]] = true; isEmptyFile[seqFileIndexArray[subTaskId]] = false; @@ -212,9 +215,12 @@ private void checkIsDeviceExistAndGetDeviceEndTime() throws IOException { @Override public void updateStartTimeAndEndTime(String device, long time, int subTaskId) { - int fileIndex = seqFileIndexArray[subTaskId]; - TsFileResource resource = targetTsFileResources.get(fileIndex); - resource.updateStartTime(device, time); - resource.updateEndTime(device, time); + synchronized (this) { + int fileIndex = seqFileIndexArray[subTaskId]; + TsFileResource resource = targetTsFileResources.get(fileIndex); + // we need to synchronized here to avoid multi-thread competition in sub-task + resource.updateStartTime(device, time); + resource.updateEndTime(device, time); + } } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java index d8eb1b2f92009..ef527bc55dff6 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java @@ -82,8 +82,11 @@ public void write( AlignedChunkWriterImpl chunkWriter = (AlignedChunkWriterImpl) this.chunkWriters[subTaskId]; chunkWriter.write(timestamps, columns, batchSize); checkChunkSizeAndMayOpenANewChunk(fileWriter, subTaskId); - resource.updateStartTime(device, timestamps.getStartTime()); - resource.updateEndTime(device, timestamps.getEndTime()); + synchronized (this) { + // we need to synchronized here to avoid multi-thread competition in sub-task + resource.updateStartTime(device, timestamps.getStartTime()); + resource.updateEndTime(device, timestamps.getEndTime()); + } isEmptyFile = false; } @@ -105,8 +108,11 @@ public void close() throws IOException { @Override public void updateStartTimeAndEndTime(String device, long time, int subTaskId) { - resource.updateStartTime(device, time); - resource.updateEndTime(device, time); + // we need to synchronized here to avoid multi-thread competition in sub-task + synchronized (this) { + resource.updateStartTime(device, time); + resource.updateEndTime(device, time); + } } @Override diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 39b19605564dd..6159ee4caabcb 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -223,6 +223,7 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { // this series is the time column of the aligned device // the full series path will be like "root.sg.d." // we remove the last . in the series id here + currentPath = new Path(currentSeries); currentDevice = currentSeries.substring(0, currentSeries.length() - 1); } else { currentPath = new Path(currentSeries, true); From 8859ff2a9dccc8e8b878f2215d31fc1d43229282 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Thu, 15 Sep 2022 17:48:33 +0800 Subject: [PATCH 20/31] temp for refactor --- .../AlignedSeriesCompactionExecutor.java | 6 +- .../utils/SingleSeriesCompactionExecutor.java | 5 +- .../impl/ReadChunkCompactionPerformer.java | 10 +- .../writer/AbstractCompactionWriter.java | 3 +- .../writer/CrossSpaceCompactionWriter.java | 4 +- .../writer/InnerSpaceCompactionWriter.java | 6 +- .../tsfile/file/metadata/ChunkMetadata.java | 8 + .../writer/MemoryControlTsFileIOWriter.java | 335 ------------------ .../tsfile/write/writer/TsFileIOWriter.java | 256 +++++++++++-- .../writer/tsmiterator/DiskTSMIterator.java | 123 +++++++ .../write/writer/tsmiterator/TSMIterator.java | 119 +++++++ .../MemoryControlTsFileIOWriterTest.java | 9 +- 12 files changed, 495 insertions(+), 389 deletions(-) create mode 100644 tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java create mode 100644 tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java index 3bc256507bab2..3de4c64a36e36 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/AlignedSeriesCompactionExecutor.java @@ -39,7 +39,7 @@ import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import com.google.common.util.concurrent.RateLimiter; @@ -56,7 +56,7 @@ public class AlignedSeriesCompactionExecutor { private final LinkedList>> readerAndChunkMetadataList; private final TsFileResource targetResource; - private final MemoryControlTsFileIOWriter writer; + private final TsFileIOWriter writer; private final AlignedChunkWriterImpl chunkWriter; private final List schemaList; @@ -73,7 +73,7 @@ public AlignedSeriesCompactionExecutor( String device, TsFileResource targetResource, LinkedList>> readerAndChunkMetadataList, - MemoryControlTsFileIOWriter writer) + TsFileIOWriter writer) throws IOException { this.device = device; this.readerAndChunkMetadataList = readerAndChunkMetadataList; diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java index c852fa565e1ec..e336733a66a1f 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java @@ -38,6 +38,7 @@ import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import com.google.common.util.concurrent.RateLimiter; @@ -50,7 +51,7 @@ public class SingleSeriesCompactionExecutor { private String device; private PartialPath series; private LinkedList>> readerAndChunkMetadataList; - private MemoryControlTsFileIOWriter fileWriter; + private TsFileIOWriter fileWriter; private TsFileResource targetResource; private IMeasurementSchema schema; @@ -93,7 +94,7 @@ public SingleSeriesCompactionExecutor( public SingleSeriesCompactionExecutor( PartialPath series, LinkedList>> readerAndChunkMetadataList, - MemoryControlTsFileIOWriter fileWriter, + TsFileIOWriter fileWriter, TsFileResource targetResource) { this.device = series.getDevice(); this.series = series; diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java index 38e7f036d6911..779a7c820d323 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java @@ -34,7 +34,7 @@ import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.utils.Pair; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -71,8 +71,8 @@ public void perform() * 5 / 100L; try (MultiTsFileDeviceIterator deviceIterator = new MultiTsFileDeviceIterator(seqFiles); - MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(targetResource.getTsFile(), sizeForFileWriter)) { + TsFileIOWriter writer = + new TsFileIOWriter(targetResource.getTsFile(), true, sizeForFileWriter)) { while (deviceIterator.hasNextDevice()) { Pair deviceInfo = deviceIterator.nextDevice(); String device = deviceInfo.left; @@ -114,7 +114,7 @@ public void setSummary(CompactionTaskSummary summary) { private void compactAlignedSeries( String device, TsFileResource targetResource, - MemoryControlTsFileIOWriter writer, + TsFileIOWriter writer, MultiTsFileDeviceIterator deviceIterator) throws IOException, InterruptedException { checkThreadInterrupted(); @@ -137,7 +137,7 @@ private void checkThreadInterrupted() throws InterruptedException { private void compactNotAlignedSeries( String device, TsFileResource targetResource, - MemoryControlTsFileIOWriter writer, + TsFileIOWriter writer, MultiTsFileDeviceIterator deviceIterator) throws IOException, MetadataException, InterruptedException { MultiTsFileDeviceIterator.MeasurementIterator seriesIterator = diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java index eae92956b5ac0..ae01567f9bd3c 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/AbstractCompactionWriter.java @@ -32,7 +32,6 @@ import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.chunk.IChunkWriter; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -185,7 +184,7 @@ protected void writeRateLimit(long bytesLength) { public void checkAndMayFlushChunkMetadata() throws IOException { List writers = this.getFileIOWriter(); for (TsFileIOWriter writer : writers) { - ((MemoryControlTsFileIOWriter) writer).checkMetadataSizeAndMayFlush(); + writer.checkMetadataSizeAndMayFlush(); } } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java index 038d009d16c31..c9b945964eca5 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java @@ -26,7 +26,6 @@ import org.apache.iotdb.tsfile.read.common.block.column.Column; import org.apache.iotdb.tsfile.read.common.block.column.TimeColumn; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -73,8 +72,7 @@ public CrossSpaceCompactionWriter( / targetResources.size(); for (int i = 0; i < targetResources.size(); i++) { this.fileWriterList.add( - new MemoryControlTsFileIOWriter( - targetResources.get(i).getTsFile(), memorySizeForEachWriter)); + new TsFileIOWriter(targetResources.get(i).getTsFile(), true, memorySizeForEachWriter)); isEmptyFile[i] = true; } this.seqTsFileResources = seqFileResources; diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java index ef527bc55dff6..72de8a0e831ea 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java @@ -24,7 +24,6 @@ import org.apache.iotdb.tsfile.read.common.block.column.Column; import org.apache.iotdb.tsfile.read.common.block.column.TimeColumn; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import java.io.IOException; @@ -32,7 +31,7 @@ import java.util.List; public class InnerSpaceCompactionWriter extends AbstractCompactionWriter { - private MemoryControlTsFileIOWriter fileWriter; + private TsFileIOWriter fileWriter; private boolean isEmptyFile; private TsFileResource resource; @@ -43,8 +42,7 @@ public InnerSpaceCompactionWriter(TsFileResource targetFileResource) throws IOEx / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() * 5 / 100L; - this.fileWriter = - new MemoryControlTsFileIOWriter(targetFileResource.getTsFile(), sizeForFileWriter); + this.fileWriter = new TsFileIOWriter(targetFileResource.getTsFile(), true, sizeForFileWriter); isEmptyFile = true; resource = targetFileResource; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java index 8c1fab686a446..a9bd99d739eb5 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java @@ -221,6 +221,14 @@ public static ChunkMetadata deserializeFrom( return chunkMetaData; } + public static ChunkMetadata deserializeFrom(ByteBuffer buffer, TSDataType dataType) { + ChunkMetadata chunkMetadata = new ChunkMetadata(); + chunkMetadata.tsDataType = dataType; + chunkMetadata.offsetOfChunkHeader = ReadWriteIOUtils.readLong(buffer); + chunkMetadata.statistics = Statistics.deserialize(buffer, dataType); + return chunkMetadata; + } + @Override public long getVersion() { return version; diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java index 6159ee4caabcb..986d4b37db5ba 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java @@ -19,42 +19,13 @@ package org.apache.iotdb.tsfile.write.writer; -import org.apache.iotdb.tsfile.common.conf.TSFileConfig; -import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; -import org.apache.iotdb.tsfile.file.MetaMarker; -import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; -import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; -import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; -import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; -import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; -import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; -import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType; -import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.common.Path; -import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; -import org.apache.iotdb.tsfile.utils.BloomFilter; -import org.apache.iotdb.tsfile.utils.Pair; -import org.apache.iotdb.tsfile.utils.PublicBAOS; -import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; -import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Queue; -import java.util.TreeMap; - -import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.addCurrentIndexNodeToQueue; -import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.checkAndBuildLevelIndex; -import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; /** * This writer control the total size of chunk metadata to avoid OOM when writing massive @@ -86,310 +57,4 @@ public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOExc this.maxMetadataSize = maxMetadataSize; this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_SUFFIX); } - - @Override - public void endCurrentChunk() { - currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); - super.endCurrentChunk(); - } - - /** - * Check if the size of chunk metadata in memory is greater than the given threshold. If so, the - * chunk metadata will be written to a temp files. Notice! If you are writing a aligned device, - * you should make sure all data of current writing device has been written before this method is - * called. For not aligned series, there is no such limitation. - * - * @throws IOException - */ - public void checkMetadataSizeAndMayFlush() throws IOException { - // This function should be called after all data of an aligned device has been written - if (currentChunkMetadataSize > maxMetadataSize) { - try { - sortAndFlushChunkMetadata(); - } catch (IOException e) { - LOG.error("Meets exception when flushing metadata to temp file for {}", file, e); - throw e; - } - } - } - - /** - * Sort the chunk metadata by the lexicographical order and the start time of the chunk, then - * flush them to a temp file. - * - * @throws IOException - */ - protected void sortAndFlushChunkMetadata() throws IOException { - // group by series - Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); - if (tempOutput == null) { - tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); - } - hasChunkMetadataInDisk = true; - // the file structure in temp file will be - // chunkSize | chunkBuffer - for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { - Path seriesPath = entry.getKey(); - if (!seriesPath.equals(lastSerializePath)) { - // record the count of path to construct bloom filter later - pathCount++; - } - List iChunkMetadataList = entry.getValue(); - writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); - lastSerializePath = seriesPath; - } - // clear the cache metadata to release the memory - chunkGroupMetadataList.clear(); - if (chunkMetadataList != null) { - chunkMetadataList.clear(); - } - } - - private void writeChunkMetadata( - List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) - throws IOException { - for (IChunkMetadata chunkMetadata : iChunkMetadataList) { - PublicBAOS buffer = new PublicBAOS(); - int size = chunkMetadata.serializeWithFullInfo(buffer, seriesPath.getFullPath()); - ReadWriteIOUtils.write(size, output); - buffer.writeTo(output); - } - } - - @Override - public void endFile() throws IOException { - if (!hasChunkMetadataInDisk) { - // all the chunk metadata is stored in memory - // sort the chunk metadata, construct the index tree - // and just close the file - super.endFile(); - return; - } - - // there is some chunk metadata already been written to the disk - // first we should flush the remaining chunk metadata in memory to disk - // then read the persisted chunk metadata from disk - sortAndFlushChunkMetadata(); - tempOutput.close(); - - // read in the chunk metadata, and construct the index tree - readChunkMetadataAndConstructIndexTree(); - - // write magic string - out.write(MAGIC_STRING_BYTES); - - // close file - out.close(); - canWrite = false; - FileUtils.delete(new File(file + CHUNK_METADATA_TEMP_FILE_SUFFIX)); - } - - private void readChunkMetadataAndConstructIndexTree() throws IOException { - tempOutput.close(); - long metaOffset = out.getPosition(); - - // serialize the SEPARATOR of MetaData - ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); - - ChunkMetadataReadIterator iterator = - new ChunkMetadataReadIterator( - 0, - chunkMetadataTempFile.length(), - new LocalTsFileInput(chunkMetadataTempFile.toPath())); - Map deviceMetadataIndexMap = new TreeMap<>(); - Queue measurementMetadataIndexQueue = new ArrayDeque<>(); - String currentDevice = null; - String prevDevice = null; - MetadataIndexNode currentIndexNode = - new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); - TSFileConfig config = TSFileDescriptor.getInstance().getConfig(); - int seriesIdxForCurrDevice = 0; - BloomFilter filter = - BloomFilter.getEmptyBloomFilter( - TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), pathCount); - - int indexCount = 0; - while (iterator.hasNextChunkMetadata()) { - // read in all chunk metadata of one series - // construct the timeseries metadata for this series - TimeseriesMetadata timeseriesMetadata = readTimeseriesMetadata(iterator); - - indexCount++; - // build bloom filter - filter.add(currentSeries); - // construct the index tree node for the series - Path currentPath = null; - if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { - // this series is the time column of the aligned device - // the full series path will be like "root.sg.d." - // we remove the last . in the series id here - currentPath = new Path(currentSeries); - currentDevice = currentSeries.substring(0, currentSeries.length() - 1); - } else { - currentPath = new Path(currentSeries, true); - currentDevice = currentPath.getDevice(); - } - if (!currentDevice.equals(prevDevice)) { - if (prevDevice != null) { - addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); - deviceMetadataIndexMap.put( - prevDevice, - generateRootNode( - measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); - currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); - } - measurementMetadataIndexQueue = new ArrayDeque<>(); - seriesIdxForCurrDevice = 0; - } - - if (seriesIdxForCurrDevice % config.getMaxDegreeOfIndexNode() == 0) { - if (currentIndexNode.isFull()) { - addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); - currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); - } - if (timeseriesMetadata.getTSDataType() != TSDataType.VECTOR) { - currentIndexNode.addEntry( - new MetadataIndexEntry(currentPath.getMeasurement(), out.getPosition())); - } else { - currentIndexNode.addEntry(new MetadataIndexEntry("", out.getPosition())); - } - } - - prevDevice = currentDevice; - seriesIdxForCurrDevice++; - // serialize the timeseries metadata to file - timeseriesMetadata.serializeTo(out.wrapAsStream()); - } - - addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); - deviceMetadataIndexMap.put( - prevDevice, - generateRootNode( - measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); - - if (indexCount != pathCount) { - throw new IOException( - String.format( - "Expected path count is %d, index path count is %d", pathCount, indexCount)); - } - - MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); - - TsFileMetadata tsFileMetadata = new TsFileMetadata(); - tsFileMetadata.setMetadataIndex(metadataIndex); - tsFileMetadata.setMetaOffset(metaOffset); - - int size = tsFileMetadata.serializeTo(out.wrapAsStream()); - size += tsFileMetadata.serializeBloomFilter(out.wrapAsStream(), filter); - - // write TsFileMetaData size - ReadWriteIOUtils.write(size, out.wrapAsStream()); - } - - /** - * Read in all the chunk metadata for a series, and construct a TimeseriesMetadata for it - * - * @param iterator - * @return - * @throws IOException - */ - private TimeseriesMetadata readTimeseriesMetadata(ChunkMetadataReadIterator iterator) - throws IOException { - List iChunkMetadataList = new ArrayList<>(); - currentSeries = iterator.getAllChunkMetadataForNextSeries(iChunkMetadataList); - TimeseriesMetadata timeseriesMetadata = - super.constructOneTimeseriesMetadata(new Path(currentSeries), iChunkMetadataList); - if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { - // set empty measurement id for time column - timeseriesMetadata.setMeasurementId(""); - } else { - timeseriesMetadata.setMeasurementId(new Path(currentSeries, true).getMeasurement()); - } - return timeseriesMetadata; - } - - @Override - public void close() throws IOException { - super.close(); - if (tempOutput != null) { - this.tempOutput.close(); - } - } - - protected class ChunkMetadataReadIterator { - - final LocalTsFileInput input; - final long startPosition; - final long endPosition; - final ByteBuffer sizeBuffer = ByteBuffer.allocate(4); - final ByteBuffer typeBuffer = ByteBuffer.allocate(1); - private Pair currentPair = null; - - ChunkMetadataReadIterator(long startPosition, long endPosition, LocalTsFileInput input) - throws IOException { - this.startPosition = startPosition; - this.endPosition = endPosition; - this.input = input; - this.input.position(startPosition); - } - - public boolean hasNextChunkMetadata() throws IOException { - return currentPair != null || this.input.position() < endPosition; - } - - /** - * Read in next chunk, return the series full path and the chunk metadata. - * - * @return - * @throws IOException - */ - protected Pair getNextSeriesNameAndChunkMetadata() throws IOException { - if (input.position() >= endPosition) { - currentPair = null; - return null; - } - int size = readNextChunkMetadataSize(); - ByteBuffer chunkBuffer = ByteBuffer.allocate(size); - ReadWriteIOUtils.readAsPossible(input, chunkBuffer); - chunkBuffer.flip(); - ChunkMetadata chunkMetadata = new ChunkMetadata(); - String seriesPath = ChunkMetadata.deserializeWithFullInfo(chunkBuffer, chunkMetadata); - currentPair = new Pair<>(seriesPath, chunkMetadata); - return currentPair; - } - - public String getAllChunkMetadataForNextSeries(List iChunkMetadataList) - throws IOException { - // TODO: read all the chunk metadata of a single series once instead of reading it iteratively - if (currentPair == null) { - if (!hasNextChunkMetadata()) { - return null; - } else { - getNextSeriesNameAndChunkMetadata(); - } - } - String currentSeries = currentPair.left; - iChunkMetadataList.add(currentPair.right); - while (hasNextChunkMetadata()) { - getNextSeriesNameAndChunkMetadata(); - if (currentPair != null && currentPair.left.equals(currentSeries)) { - iChunkMetadataList.add(currentPair.right); - } else { - break; - } - } - return currentSeries; - } - - public Pair getCurrentPair() { - return currentPair; - } - - private int readNextChunkMetadataSize() throws IOException { - sizeBuffer.clear(); - ReadWriteIOUtils.readAsPossible(input, sizeBuffer); - sizeBuffer.flip(); - return ReadWriteIOUtils.readInt(sizeBuffer); - } - } } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 15823e7bf4d7d..68775ff8c8788 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -27,34 +27,46 @@ import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor; +import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.TsFileMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; +import org.apache.iotdb.tsfile.file.metadata.enums.MetadataIndexNodeType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.fileSystem.FSFactoryProducer; import org.apache.iotdb.tsfile.read.common.Chunk; import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.utils.BloomFilter; import org.apache.iotdb.tsfile.utils.BytesUtils; import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; +import org.apache.iotdb.tsfile.write.writer.tsmiterator.TSMIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; import java.util.TreeMap; +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.addCurrentIndexNodeToQueue; +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.checkAndBuildLevelIndex; +import static org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor.generateRootNode; + /** * TsFileIOWriter is used to construct metadata and write data stored in memory to output stream. */ @@ -93,6 +105,20 @@ public class TsFileIOWriter implements AutoCloseable { private long minPlanIndex; private long maxPlanIndex; + // the following variable is used for memory control + protected long maxMetadataSize; + protected long currentChunkMetadataSize = 0L; + protected File chunkMetadataTempFile; + protected LocalTsFileOutput tempOutput; + protected volatile boolean hasChunkMetadataInDisk = false; + protected String currentSeries = null; + // record the total num of path in order to make bloom filter + protected int pathCount = 0; + protected boolean enableMemoryControl = false; + Path lastSerializePath = null; + private LinkedList endPosInCMTForDevice = new LinkedList<>(); + public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".cmt"; + /** empty construct function. */ protected TsFileIOWriter() {} @@ -126,6 +152,14 @@ public TsFileIOWriter(TsFileOutput output, boolean test) { this.out = output; } + /** for write with memory control */ + public TsFileIOWriter(File file, boolean enableMemoryControl, long maxMetadataSize) + throws IOException { + this(file); + this.enableMemoryControl = enableMemoryControl; + this.maxMetadataSize = maxMetadataSize; + } + /** * Writes given bytes to output stream. This method is called when total memory size exceeds the * chunk group size threshold. @@ -236,6 +270,9 @@ public void writeChunk(Chunk chunk, ChunkMetadata chunkMetadata) throws IOExcept /** end chunk and write some log. */ public void endCurrentChunk() { + if (enableMemoryControl) { + this.currentChunkMetadataSize += currentChunkMetadata.calculateRamSize(); + } chunkMetadataList.add(currentChunkMetadata); currentChunkMetadata = null; } @@ -268,41 +305,13 @@ protected Map> groupChunkMetadataListBySeries() { */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public void endFile() throws IOException { - long metaOffset = out.getPosition(); - - // serialize the SEPARATOR of MetaData - ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); - - // group ChunkMetadata by series - Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); - - MetadataIndexNode metadataIndex = flushMetadataIndex(chunkMetadataListMap); - TsFileMetadata tsFileMetaData = new TsFileMetadata(); - tsFileMetaData.setMetadataIndex(metadataIndex); - tsFileMetaData.setMetaOffset(metaOffset); + readChunkMetadataAndConstructIndexTree(); long footerIndex = out.getPosition(); if (logger.isDebugEnabled()) { logger.debug("start to flush the footer,file pos:{}", footerIndex); } - // write TsFileMetaData - int size = tsFileMetaData.serializeTo(out.wrapAsStream()); - if (logger.isDebugEnabled()) { - logger.debug("finish flushing the footer {}, file pos:{}", tsFileMetaData, out.getPosition()); - } - - // write bloom filter - size += - tsFileMetaData.buildAndSerializeBloomFilter( - out.wrapAsStream(), chunkMetadataListMap.keySet()); - if (logger.isDebugEnabled()) { - logger.debug("finish flushing the bloom filter file pos:{}", out.getPosition()); - } - - // write TsFileMetaData size - ReadWriteIOUtils.write(size, out.wrapAsStream()); // write the size of the file metadata. - // write magic string out.write(MAGIC_STRING_BYTES); @@ -314,6 +323,110 @@ public void endFile() throws IOException { canWrite = false; } + private void readChunkMetadataAndConstructIndexTree() throws IOException { + if (tempOutput != null) { + tempOutput.close(); + } + long metaOffset = out.getPosition(); + + // serialize the SEPARATOR of MetaData + ReadWriteIOUtils.write(MetaMarker.SEPARATOR, out.wrapAsStream()); + + TSMIterator tsmIterator = + hasChunkMetadataInDisk + ? TSMIterator.getTSMIteratorInDisk( + chunkMetadataTempFile, chunkGroupMetadataList, endPosInCMTForDevice) + : TSMIterator.getTSMIteratorInMemory(chunkGroupMetadataList); + Map deviceMetadataIndexMap = new TreeMap<>(); + Queue measurementMetadataIndexQueue = new ArrayDeque<>(); + String currentDevice = null; + String prevDevice = null; + MetadataIndexNode currentIndexNode = + new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + TSFileConfig config = TSFileDescriptor.getInstance().getConfig(); + int seriesIdxForCurrDevice = 0; + BloomFilter filter = + BloomFilter.getEmptyBloomFilter( + TSFileDescriptor.getInstance().getConfig().getBloomFilterErrorRate(), pathCount); + + int indexCount = 0; + while (tsmIterator.hasNext()) { + // read in all chunk metadata of one series + // construct the timeseries metadata for this series + TimeseriesMetadata timeseriesMetadata = tsmIterator.next(); + + indexCount++; + // build bloom filter + filter.add(currentSeries); + // construct the index tree node for the series + Path currentPath = null; + if (timeseriesMetadata.getTSDataType() == TSDataType.VECTOR) { + // this series is the time column of the aligned device + // the full series path will be like "root.sg.d." + // we remove the last . in the series id here + currentPath = new Path(currentSeries); + currentDevice = currentSeries.substring(0, currentSeries.length() - 1); + } else { + currentPath = new Path(currentSeries, true); + currentDevice = currentPath.getDevice(); + } + if (!currentDevice.equals(prevDevice)) { + if (prevDevice != null) { + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + deviceMetadataIndexMap.put( + prevDevice, + generateRootNode( + measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + } + measurementMetadataIndexQueue = new ArrayDeque<>(); + seriesIdxForCurrDevice = 0; + } + + if (seriesIdxForCurrDevice % config.getMaxDegreeOfIndexNode() == 0) { + if (currentIndexNode.isFull()) { + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + currentIndexNode = new MetadataIndexNode(MetadataIndexNodeType.LEAF_MEASUREMENT); + } + if (timeseriesMetadata.getTSDataType() != TSDataType.VECTOR) { + currentIndexNode.addEntry( + new MetadataIndexEntry(currentPath.getMeasurement(), out.getPosition())); + } else { + currentIndexNode.addEntry(new MetadataIndexEntry("", out.getPosition())); + } + } + + prevDevice = currentDevice; + seriesIdxForCurrDevice++; + // serialize the timeseries metadata to file + timeseriesMetadata.serializeTo(out.wrapAsStream()); + } + + addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); + deviceMetadataIndexMap.put( + prevDevice, + generateRootNode( + measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + + if (indexCount != pathCount) { + throw new IOException( + String.format( + "Expected path count is %d, index path count is %d", pathCount, indexCount)); + } + + MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); + + TsFileMetadata tsFileMetadata = new TsFileMetadata(); + tsFileMetadata.setMetadataIndex(metadataIndex); + tsFileMetadata.setMetaOffset(metaOffset); + + int size = tsFileMetadata.serializeTo(out.wrapAsStream()); + size += tsFileMetadata.serializeBloomFilter(out.wrapAsStream(), filter); + + // write TsFileMetaData size + ReadWriteIOUtils.write(size, out.wrapAsStream()); + } + /** * Flush TsFileMetadata, including ChunkMetadataList and TimeseriesMetaData * @@ -417,6 +530,9 @@ public void reset() throws IOException { public void close() throws IOException { canWrite = false; out.close(); + if (tempOutput != null) { + this.tempOutput.close(); + } } void writeSeparatorMaskForTest() throws IOException { @@ -513,4 +629,86 @@ public long getMaxPlanIndex() { public void setMaxPlanIndex(long maxPlanIndex) { this.maxPlanIndex = maxPlanIndex; } + + /** + * Check if the size of chunk metadata in memory is greater than the given threshold. If so, the + * chunk metadata will be written to a temp files. Notice! If you are writing a aligned device, + * you should make sure all data of current writing device has been written before this method is + * called. For not aligned series, there is no such limitation. + * + * @throws IOException + */ + public void checkMetadataSizeAndMayFlush() throws IOException { + // This function should be called after all data of an aligned device has been written + if (enableMemoryControl && currentChunkMetadataSize > maxMetadataSize) { + try { + sortAndFlushChunkMetadata(); + } catch (IOException e) { + logger.error("Meets exception when flushing metadata to temp file for {}", file, e); + throw e; + } + } + } + + /** + * Sort the chunk metadata by the lexicographical order and the start time of the chunk, then + * flush them to a temp file. + * + * @throws IOException + */ + protected void sortAndFlushChunkMetadata() throws IOException { + // group by series + Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); + if (tempOutput == null) { + tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); + } + hasChunkMetadataInDisk = true; + // the file structure in temp file will be + // chunkSize | chunkBuffer + for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { + Path seriesPath = entry.getKey(); + if (!seriesPath.equals(lastSerializePath)) { + // record the count of path to construct bloom filter later + pathCount++; + } + List iChunkMetadataList = entry.getValue(); + writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); + lastSerializePath = seriesPath; + } + // clear the cache metadata to release the memory + chunkGroupMetadataList.clear(); + if (chunkMetadataList != null) { + chunkMetadataList.clear(); + } + } + + private void writeChunkMetadata( + List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) + throws IOException { + if (tempOutput == null) { + tempOutput = + new LocalTsFileOutput( + new FileOutputStream(file.getAbsolutePath() + CHUNK_METADATA_TEMP_FILE_SUFFIX)); + } + // [DeviceId] measurementId datatype size chunkMetadataBuffer + if (lastSerializePath == null + || !seriesPath.getDevice().equals(lastSerializePath.getDevice())) { + // mark the end position of last device + endPosInCMTForDevice.add(tempOutput.getPosition()); + // serialize the device + ReadWriteIOUtils.write(seriesPath.getDevice(), tempOutput.wrapAsStream()); + } + if (!seriesPath.equals(lastSerializePath) && iChunkMetadataList.size() > 0) { + // serialize the public info of this measurement + ReadWriteIOUtils.write(seriesPath.getMeasurement(), tempOutput.wrapAsStream()); + ReadWriteIOUtils.write(iChunkMetadataList.get(0).getDataType(), tempOutput.wrapAsStream()); + } + PublicBAOS buffer = new PublicBAOS(); + int totalSize = 0; + for (IChunkMetadata chunkMetadata : iChunkMetadataList) { + totalSize += chunkMetadata.serializeTo(buffer, true); + } + ReadWriteIOUtils.write(totalSize, tempOutput.wrapAsStream()); + tempOutput.write(buffer.getBuf()); + } } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java new file mode 100644 index 0000000000000..bbe8d01161162 --- /dev/null +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.tsfile.write.writer.tsmiterator; + +import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; +import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +public class DiskTSMIterator extends TSMIterator { + + private static final Logger LOG = LoggerFactory.getLogger(DiskTSMIterator.class); + + LinkedList endPosForEachDevice; + File cmtFile; + LocalTsFileInput input; + long fileLength = 0; + long currentPos = 0; + long nextEndPosForDevice = 0; + String currentDevice; + boolean remainsInFile = true; + + protected DiskTSMIterator( + File cmtFile, + List chunkGroupMetadataList, + LinkedList endPosForEachDevice) + throws IOException { + super(chunkGroupMetadataList); + this.cmtFile = cmtFile; + this.endPosForEachDevice = endPosForEachDevice; + this.input = new LocalTsFileInput(cmtFile.toPath()); + this.fileLength = cmtFile.length(); + } + + @Override + public boolean hasNext() { + return remainsInFile || iterator.hasNext(); + } + + @Override + public TimeseriesMetadata next() { + try { + if (remainsInFile) { + // deserialize from file + return getTimeSerisMetadataFromFile(); + } else { + // get from memory iterator + return super.next(); + } + } catch (IOException e) { + LOG.error("Meets IOException when reading timeseries metadata from disk", e); + return null; + } + } + + private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { + if (currentPos == nextEndPosForDevice) { + // deserialize the current device name + currentDevice = ReadWriteIOUtils.readVarIntString(input.wrapAsInputStream()); + nextEndPosForDevice = + endPosForEachDevice.size() > 0 ? endPosForEachDevice.removeFirst() : fileLength; + } + // deserialize public info for measurement + String measurementUid = ReadWriteIOUtils.readVarIntString(input.wrapAsInputStream()); + byte dataTypeInByte = ReadWriteIOUtils.readByte(input.wrapAsInputStream()); + TSDataType dataType = TSDataType.getTsDataType(dataTypeInByte); + int chunkBufferSize = ReadWriteIOUtils.readInt(input.wrapAsInputStream()); + ByteBuffer chunkBuffer = ByteBuffer.allocate(chunkBufferSize); + int readSize = ReadWriteIOUtils.readAsPossible(input, chunkBuffer); + if (readSize < chunkBufferSize) { + throw new IOException( + String.format( + "Expected to read %s bytes, but actually read %s bytes", chunkBufferSize, readSize)); + } + + // deserialize chunk metadata from chunk buffer + List chunkMetadataList = new ArrayList<>(); + while (chunkBuffer.hasRemaining()) { + chunkMetadataList.add(ChunkMetadata.deserializeFrom(chunkBuffer, dataType)); + } + updateCurrentPos(); + return constructOneTimeseriesMetadata( + new Path(currentDevice, measurementUid), chunkMetadataList); + } + + private void updateCurrentPos() throws IOException { + currentPos = input.position(); + if (currentPos >= fileLength) { + remainsInFile = false; + } + } +} diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java new file mode 100644 index 0000000000000..6594356af2750 --- /dev/null +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iotdb.tsfile.write.writer.tsmiterator; + +import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; +import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.utils.PublicBAOS; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +public class TSMIterator implements Iterator { + private static Logger LOG = LoggerFactory.getLogger(TSMIterator.class); + protected Map> chunkMetadataListMap = new TreeMap<>(); + protected Iterator>> iterator; + + protected TSMIterator(List chunkGroupMetadataList) { + this.groupChunkMetadataListBySeries(chunkGroupMetadataList); + } + + public static TSMIterator getTSMIteratorInMemory( + List chunkGroupMetadataList) { + return new TSMIterator(chunkGroupMetadataList); + } + + public static TSMIterator getTSMIteratorInDisk( + File cmtFile, List chunkGroupMetadataList, LinkedList serializePos) + throws IOException { + return new DiskTSMIterator(cmtFile, chunkGroupMetadataList, serializePos); + } + + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + + @Override + public TimeseriesMetadata next() { + Map.Entry> nextEntry = iterator.next(); + try { + return constructOneTimeseriesMetadata(nextEntry.getKey(), nextEntry.getValue()); + } catch (IOException e) { + LOG.error("Meets IOException when getting next TimeseriesMetadata", e); + return null; + } + } + + protected void groupChunkMetadataListBySeries(List chunkGroupMetadataList) { + // group ChunkMetadata by series + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + List chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); + for (IChunkMetadata chunkMetadata : chunkMetadatas) { + Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); + chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + } + } + this.iterator = chunkMetadataListMap.entrySet().iterator(); + } + + protected TimeseriesMetadata constructOneTimeseriesMetadata( + Path path, List chunkMetadataList) throws IOException { + // create TimeseriesMetaData + PublicBAOS publicBAOS = new PublicBAOS(); + TSDataType dataType = chunkMetadataList.get(chunkMetadataList.size() - 1).getDataType(); + Statistics seriesStatistics = Statistics.getStatsByType(dataType); + + int chunkMetadataListLength = 0; + boolean serializeStatistic = (chunkMetadataList.size() > 1); + // flush chunkMetadataList one by one + for (IChunkMetadata chunkMetadata : chunkMetadataList) { + if (!chunkMetadata.getDataType().equals(dataType)) { + continue; + } + chunkMetadataListLength += chunkMetadata.serializeTo(publicBAOS, serializeStatistic); + seriesStatistics.mergeStatistics(chunkMetadata.getStatistics()); + } + + TimeseriesMetadata timeseriesMetadata = + new TimeseriesMetadata( + (byte) + ((serializeStatistic ? (byte) 1 : (byte) 0) | chunkMetadataList.get(0).getMask()), + chunkMetadataListLength, + path.getMeasurement(), + dataType, + seriesStatistics, + publicBAOS); + return timeseriesMetadata; + } +} diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index b9f9ff71f6cc3..4ab49eb0ebf4f 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -127,8 +127,7 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { writer.tempOutput.flush(); ChunkMetadataReadIterator window = - writer - .new ChunkMetadataReadIterator( + new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); @@ -168,8 +167,7 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException writer.tempOutput.flush(); ChunkMetadataReadIterator window = - writer - .new ChunkMetadataReadIterator( + new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); @@ -248,8 +246,7 @@ public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { writer.tempOutput.flush(); ChunkMetadataReadIterator window = - writer - .new ChunkMetadataReadIterator( + new ChunkMetadataReadIterator( 0, writer.chunkMetadataTempFile.length(), new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); From 2612b3a7b16523fdc9414890385231a60b9332a3 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Thu, 15 Sep 2022 20:31:48 +0800 Subject: [PATCH 21/31] fix deserialize bug --- .../tsfile/write/writer/TsFileIOWriter.java | 11 +- .../writer/tsmiterator/DiskTSMIterator.java | 6 +- .../MemoryControlTsFileIOWriterTest.java | 293 +++++++++--------- 3 files changed, 161 insertions(+), 149 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 68775ff8c8788..f06fe6ac12fae 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -116,7 +116,7 @@ public class TsFileIOWriter implements AutoCloseable { protected int pathCount = 0; protected boolean enableMemoryControl = false; Path lastSerializePath = null; - private LinkedList endPosInCMTForDevice = new LinkedList<>(); + protected LinkedList endPosInCMTForDevice = new LinkedList<>(); public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".cmt"; /** empty construct function. */ @@ -158,6 +158,7 @@ public TsFileIOWriter(File file, boolean enableMemoryControl, long maxMetadataSi this(file); this.enableMemoryControl = enableMemoryControl; this.maxMetadataSize = maxMetadataSize; + chunkMetadataTempFile = new File(file.getAbsolutePath() + CHUNK_METADATA_TEMP_FILE_SUFFIX); } /** @@ -674,6 +675,7 @@ protected void sortAndFlushChunkMetadata() throws IOException { List iChunkMetadataList = entry.getValue(); writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); lastSerializePath = seriesPath; + logger.debug("Flushing {}", seriesPath); } // clear the cache metadata to release the memory chunkGroupMetadataList.clear(); @@ -685,11 +687,6 @@ protected void sortAndFlushChunkMetadata() throws IOException { private void writeChunkMetadata( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { - if (tempOutput == null) { - tempOutput = - new LocalTsFileOutput( - new FileOutputStream(file.getAbsolutePath() + CHUNK_METADATA_TEMP_FILE_SUFFIX)); - } // [DeviceId] measurementId datatype size chunkMetadataBuffer if (lastSerializePath == null || !seriesPath.getDevice().equals(lastSerializePath.getDevice())) { @@ -709,6 +706,6 @@ private void writeChunkMetadata( totalSize += chunkMetadata.serializeTo(buffer, true); } ReadWriteIOUtils.write(totalSize, tempOutput.wrapAsStream()); - tempOutput.write(buffer.getBuf()); + buffer.writeTo(tempOutput); } } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java index bbe8d01161162..a9031ea6dd119 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -61,6 +61,7 @@ protected DiskTSMIterator( this.endPosForEachDevice = endPosForEachDevice; this.input = new LocalTsFileInput(cmtFile.toPath()); this.fileLength = cmtFile.length(); + this.nextEndPosForDevice = endPosForEachDevice.removeFirst(); } @Override @@ -87,12 +88,12 @@ public TimeseriesMetadata next() { private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { if (currentPos == nextEndPosForDevice) { // deserialize the current device name - currentDevice = ReadWriteIOUtils.readVarIntString(input.wrapAsInputStream()); + currentDevice = ReadWriteIOUtils.readString(input.wrapAsInputStream()); nextEndPosForDevice = endPosForEachDevice.size() > 0 ? endPosForEachDevice.removeFirst() : fileLength; } // deserialize public info for measurement - String measurementUid = ReadWriteIOUtils.readVarIntString(input.wrapAsInputStream()); + String measurementUid = ReadWriteIOUtils.readString(input.wrapAsInputStream()); byte dataTypeInByte = ReadWriteIOUtils.readByte(input.wrapAsInputStream()); TSDataType dataType = TSDataType.getTsDataType(dataTypeInByte); int chunkBufferSize = ReadWriteIOUtils.readInt(input.wrapAsInputStream()); @@ -103,6 +104,7 @@ private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { String.format( "Expected to read %s bytes, but actually read %s bytes", chunkBufferSize, readSize)); } + chunkBuffer.flip(); // deserialize chunk metadata from chunk buffer List chunkMetadataList = new ArrayList<>(); diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java index 4ab49eb0ebf4f..6160c31d9d5a0 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java @@ -20,10 +20,8 @@ package org.apache.iotdb.tsfile.write.writer; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; -import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; -import org.apache.iotdb.tsfile.read.common.Path; -import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; @@ -32,6 +30,7 @@ import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; +import org.apache.iotdb.tsfile.write.writer.tsmiterator.TSMIterator; import org.apache.commons.io.FileUtils; import org.junit.After; @@ -92,13 +91,12 @@ public MemoryControlTsFileIOWriterTest() throws IOException { /** The following tests is for ChunkMetadata serialization and deserialization. */ @Test public void testSerializeAndDeserializeChunkMetadata() throws IOException { - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024 * 1024 * 10)) { List originChunkMetadataList = new ArrayList<>(); - for (int i = 0; i < 10; ++i) { + for (int i = 0; i < 1; ++i) { String deviceId = deviceDictInOrder.get(i); writer.startChunkGroup(deviceId); - for (int j = 0; j < 5; ++j) { + for (int j = 0; j < 1; ++j) { ChunkWriterImpl chunkWriter; switch (j) { case 0: @@ -126,145 +124,160 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); - ChunkMetadataReadIterator window = - new ChunkMetadataReadIterator( - 0, - writer.chunkMetadataTempFile.length(), - new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - for (int i = 0; i < originChunkMetadataList.size(); ++i) { - Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - Assert.assertEquals( - deviceDictInOrder.get(i / 5) + "." + measurementDictInOrder.get(i % 5), - chunkMetadataPair.left); + TSMIterator iterator = + TSMIterator.getTSMIteratorInDisk( + writer.chunkMetadataTempFile, + writer.chunkGroupMetadataList, + writer.endPosInCMTForDevice); + // for (int i = 0; i < originChunkMetadataList.size(); ++i) { + // Pair chunkMetadataPair = + // window.getNextSeriesNameAndChunkMetadata(); + // Assert.assertEquals( + // deviceDictInOrder.get(i / 5) + "." + measurementDictInOrder.get(i % 5), + // chunkMetadataPair.left); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getStartTime(), + // chunkMetadataPair.right.getStartTime()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getEndTime(), + // chunkMetadataPair.right.getEndTime()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getDataType(), + // chunkMetadataPair.right.getDataType()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getStatistics(), + // chunkMetadataPair.right.getStatistics()); + // } + for (int i = 0; iterator.hasNext(); ++i) { + TimeseriesMetadata timeseriesMetadata = iterator.next(); Assert.assertEquals( - originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); + measurementDictInOrder.get(i % 5), timeseriesMetadata.getMeasurementId()); Assert.assertEquals( - originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); - Assert.assertEquals( - originChunkMetadataList.get(i).getDataType(), chunkMetadataPair.right.getDataType()); - Assert.assertEquals( - originChunkMetadataList.get(i).getStatistics(), - chunkMetadataPair.right.getStatistics()); + originChunkMetadataList.get(i).getDataType(), timeseriesMetadata.getTSDataType()); } } } - @Test - public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { - List originChunkMetadataList = new ArrayList<>(); - for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); - writer.startChunkGroup(deviceId); - AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); - chunkWriter.writeToFileWriter(writer); - originChunkMetadataList.addAll(writer.chunkMetadataList); - writer.endChunkGroup(); - } - Map> originChunkMetadata = writer.groupChunkMetadataListBySeries(); - writer.sortAndFlushChunkMetadata(); - writer.tempOutput.flush(); - - ChunkMetadataReadIterator window = - new ChunkMetadataReadIterator( - 0, - writer.chunkMetadataTempFile.length(), - new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - List measurementIds = new ArrayList<>(); - for (int i = 0; i < 10; ++i) { - measurementIds.add(deviceDictInOrder.get(i) + "."); - for (int j = 1; j <= 6; ++j) { - measurementIds.add(deviceDictInOrder.get(i) + ".s" + j); - } - } - for (String measurementId : measurementIds) { - List chunkMetadata = new ArrayList<>(); - String seriesId = window.getAllChunkMetadataForNextSeries(chunkMetadata); - Assert.assertEquals(measurementId, seriesId); - Assert.assertEquals( - originChunkMetadata.get(new Path(measurementId)).size(), chunkMetadata.size()); - for (int i = 0; i < chunkMetadata.size(); ++i) { - Assert.assertEquals( - originChunkMetadata.get(new Path(measurementId)).get(i).getStatistics(), - chunkMetadata.get(i).getStatistics()); - Assert.assertEquals( - originChunkMetadata.get(new Path(measurementId)).get(i).getDataType(), - chunkMetadata.get(i).getDataType()); - } - } - } - } - - @Test - public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { - try (MemoryControlTsFileIOWriter writer = - new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { - List originChunkMetadataList = new ArrayList<>(); - List seriesIds = new ArrayList<>(); - for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); - writer.startChunkGroup(deviceId); - if (i % 2 == 0) { - // write normal series - for (int j = 0; j < 5; ++j) { - ChunkWriterImpl chunkWriter; - switch (j) { - case 0: - chunkWriter = generateIntData(j, 0L, new ArrayList<>()); - break; - case 1: - chunkWriter = generateBooleanData(j, 0L, new ArrayList<>()); - break; - case 2: - chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); - break; - case 3: - chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); - break; - case 4: - default: - chunkWriter = generateTextData(j, 0L, new ArrayList<>()); - break; - } - chunkWriter.writeToFileWriter(writer); - seriesIds.add(deviceId + "." + measurementDictInOrder.get(j)); - } - } else { - // write vector - AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); - chunkWriter.writeToFileWriter(writer); - seriesIds.add(deviceId + "."); - for (int l = 1; l <= 6; ++l) { - seriesIds.add(deviceId + ".s" + l); - } - } - originChunkMetadataList.addAll(writer.chunkMetadataList); - writer.endChunkGroup(); - } - writer.sortAndFlushChunkMetadata(); - writer.tempOutput.flush(); - - ChunkMetadataReadIterator window = - new ChunkMetadataReadIterator( - 0, - writer.chunkMetadataTempFile.length(), - new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - for (int i = 0; i < originChunkMetadataList.size(); ++i) { - Pair chunkMetadataPair = window.getNextSeriesNameAndChunkMetadata(); - Assert.assertEquals(seriesIds.get(i), chunkMetadataPair.left); - Assert.assertEquals( - originChunkMetadataList.get(i).getStartTime(), chunkMetadataPair.right.getStartTime()); - Assert.assertEquals( - originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); - Assert.assertEquals( - originChunkMetadataList.get(i).getDataType(), chunkMetadataPair.right.getDataType()); - Assert.assertEquals( - originChunkMetadataList.get(i).getStatistics(), - chunkMetadataPair.right.getStatistics()); - } - } - } + // @Test + // public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { + // try (MemoryControlTsFileIOWriter writer = + // new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { + // List originChunkMetadataList = new ArrayList<>(); + // for (int i = 0; i < 10; ++i) { + // String deviceId = deviceDictInOrder.get(i); + // writer.startChunkGroup(deviceId); + // AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); + // chunkWriter.writeToFileWriter(writer); + // originChunkMetadataList.addAll(writer.chunkMetadataList); + // writer.endChunkGroup(); + // } + // Map> originChunkMetadata = + // writer.groupChunkMetadataListBySeries(); + // writer.sortAndFlushChunkMetadata(); + // writer.tempOutput.flush(); + // + // ChunkMetadataReadIterator window = + // new ChunkMetadataReadIterator( + // 0, + // writer.chunkMetadataTempFile.length(), + // new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); + // List measurementIds = new ArrayList<>(); + // for (int i = 0; i < 10; ++i) { + // measurementIds.add(deviceDictInOrder.get(i) + "."); + // for (int j = 1; j <= 6; ++j) { + // measurementIds.add(deviceDictInOrder.get(i) + ".s" + j); + // } + // } + // for (String measurementId : measurementIds) { + // List chunkMetadata = new ArrayList<>(); + // String seriesId = window.getAllChunkMetadataForNextSeries(chunkMetadata); + // Assert.assertEquals(measurementId, seriesId); + // Assert.assertEquals( + // originChunkMetadata.get(new Path(measurementId)).size(), chunkMetadata.size()); + // for (int i = 0; i < chunkMetadata.size(); ++i) { + // Assert.assertEquals( + // originChunkMetadata.get(new Path(measurementId)).get(i).getStatistics(), + // chunkMetadata.get(i).getStatistics()); + // Assert.assertEquals( + // originChunkMetadata.get(new Path(measurementId)).get(i).getDataType(), + // chunkMetadata.get(i).getDataType()); + // } + // } + // } + // } + + // @Test + // public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { + // try (MemoryControlTsFileIOWriter writer = + // new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { + // List originChunkMetadataList = new ArrayList<>(); + // List seriesIds = new ArrayList<>(); + // for (int i = 0; i < 10; ++i) { + // String deviceId = deviceDictInOrder.get(i); + // writer.startChunkGroup(deviceId); + // if (i % 2 == 0) { + // // write normal series + // for (int j = 0; j < 5; ++j) { + // ChunkWriterImpl chunkWriter; + // switch (j) { + // case 0: + // chunkWriter = generateIntData(j, 0L, new ArrayList<>()); + // break; + // case 1: + // chunkWriter = generateBooleanData(j, 0L, new ArrayList<>()); + // break; + // case 2: + // chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); + // break; + // case 3: + // chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); + // break; + // case 4: + // default: + // chunkWriter = generateTextData(j, 0L, new ArrayList<>()); + // break; + // } + // chunkWriter.writeToFileWriter(writer); + // seriesIds.add(deviceId + "." + measurementDictInOrder.get(j)); + // } + // } else { + // // write vector + // AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); + // chunkWriter.writeToFileWriter(writer); + // seriesIds.add(deviceId + "."); + // for (int l = 1; l <= 6; ++l) { + // seriesIds.add(deviceId + ".s" + l); + // } + // } + // originChunkMetadataList.addAll(writer.chunkMetadataList); + // writer.endChunkGroup(); + // } + // writer.sortAndFlushChunkMetadata(); + // writer.tempOutput.flush(); + // + // ChunkMetadataReadIterator window = + // new ChunkMetadataReadIterator( + // 0, + // writer.chunkMetadataTempFile.length(), + // new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); + // for (int i = 0; i < originChunkMetadataList.size(); ++i) { + // Pair chunkMetadataPair = + // window.getNextSeriesNameAndChunkMetadata(); + // Assert.assertEquals(seriesIds.get(i), chunkMetadataPair.left); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getStartTime(), + // chunkMetadataPair.right.getStartTime()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getDataType(), + // chunkMetadataPair.right.getDataType()); + // Assert.assertEquals( + // originChunkMetadataList.get(i).getStatistics(), + // chunkMetadataPair.right.getStatistics()); + // } + // } + // } /** The following tests is for writing normal series in different nums. */ From 4b4a198e4aa522bd4b2b34620b891ad6feab7816 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 16 Sep 2022 16:00:24 +0800 Subject: [PATCH 22/31] fix bug and add some test --- .../tsfile/write/writer/TsFileIOWriter.java | 17 +- .../writer/tsmiterator/DiskTSMIterator.java | 13 +- .../write/writer/tsmiterator/TSMIterator.java | 14 +- ...a => TsFileIOWriterMemoryControlTest.java} | 551 +++++++++++------- 4 files changed, 352 insertions(+), 243 deletions(-) rename tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/{MemoryControlTsFileIOWriterTest.java => TsFileIOWriterMemoryControlTest.java} (63%) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index f06fe6ac12fae..53735fe8d2095 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -41,6 +41,7 @@ import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.utils.BloomFilter; import org.apache.iotdb.tsfile.utils.BytesUtils; +import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; import org.apache.iotdb.tsfile.write.writer.tsmiterator.TSMIterator; @@ -354,7 +355,9 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { while (tsmIterator.hasNext()) { // read in all chunk metadata of one series // construct the timeseries metadata for this series - TimeseriesMetadata timeseriesMetadata = tsmIterator.next(); + Pair timeseriesMetadataPair = tsmIterator.next(); + TimeseriesMetadata timeseriesMetadata = timeseriesMetadataPair.right; + currentSeries = timeseriesMetadataPair.left; indexCount++; // build bloom filter @@ -409,12 +412,6 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { generateRootNode( measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); - if (indexCount != pathCount) { - throw new IOException( - String.format( - "Expected path count is %d, index path count is %d", pathCount, indexCount)); - } - MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); TsFileMetadata tsFileMetadata = new TsFileMetadata(); @@ -673,7 +670,7 @@ protected void sortAndFlushChunkMetadata() throws IOException { pathCount++; } List iChunkMetadataList = entry.getValue(); - writeChunkMetadata(iChunkMetadataList, seriesPath, tempOutput); + writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath, tempOutput); lastSerializePath = seriesPath; logger.debug("Flushing {}", seriesPath); } @@ -684,7 +681,7 @@ protected void sortAndFlushChunkMetadata() throws IOException { } } - private void writeChunkMetadata( + private void writeChunkMetadataToTempFile( List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) throws IOException { // [DeviceId] measurementId datatype size chunkMetadataBuffer @@ -697,7 +694,7 @@ private void writeChunkMetadata( } if (!seriesPath.equals(lastSerializePath) && iChunkMetadataList.size() > 0) { // serialize the public info of this measurement - ReadWriteIOUtils.write(seriesPath.getMeasurement(), tempOutput.wrapAsStream()); + ReadWriteIOUtils.writeVar(seriesPath.getMeasurement(), tempOutput.wrapAsStream()); ReadWriteIOUtils.write(iChunkMetadataList.get(0).getDataType(), tempOutput.wrapAsStream()); } PublicBAOS buffer = new PublicBAOS(); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java index a9031ea6dd119..64dcb11a99e63 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -26,6 +26,7 @@ import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; +import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; import org.slf4j.Logger; @@ -70,7 +71,7 @@ public boolean hasNext() { } @Override - public TimeseriesMetadata next() { + public Pair next() { try { if (remainsInFile) { // deserialize from file @@ -85,7 +86,7 @@ public TimeseriesMetadata next() { } } - private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { + private Pair getTimeSerisMetadataFromFile() throws IOException { if (currentPos == nextEndPosForDevice) { // deserialize the current device name currentDevice = ReadWriteIOUtils.readString(input.wrapAsInputStream()); @@ -93,7 +94,7 @@ private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { endPosForEachDevice.size() > 0 ? endPosForEachDevice.removeFirst() : fileLength; } // deserialize public info for measurement - String measurementUid = ReadWriteIOUtils.readString(input.wrapAsInputStream()); + String measurementUid = ReadWriteIOUtils.readVarIntString(input.wrapAsInputStream()); byte dataTypeInByte = ReadWriteIOUtils.readByte(input.wrapAsInputStream()); TSDataType dataType = TSDataType.getTsDataType(dataTypeInByte); int chunkBufferSize = ReadWriteIOUtils.readInt(input.wrapAsInputStream()); @@ -112,14 +113,16 @@ private TimeseriesMetadata getTimeSerisMetadataFromFile() throws IOException { chunkMetadataList.add(ChunkMetadata.deserializeFrom(chunkBuffer, dataType)); } updateCurrentPos(); - return constructOneTimeseriesMetadata( - new Path(currentDevice, measurementUid), chunkMetadataList); + return new Pair<>( + currentDevice + "." + measurementUid, + constructOneTimeseriesMetadata(new Path(currentDevice, measurementUid), chunkMetadataList)); } private void updateCurrentPos() throws IOException { currentPos = input.position(); if (currentPos >= fileLength) { remainsInFile = false; + input.close(); } } } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index 6594356af2750..030c7e19392d0 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -25,6 +25,7 @@ import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.read.common.Path; +import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.PublicBAOS; import org.slf4j.Logger; @@ -39,7 +40,12 @@ import java.util.Map; import java.util.TreeMap; -public class TSMIterator implements Iterator { +/** + * TSMIterator returns full path of series and its TimeseriesMetadata iteratively. It accepts data + * source from memory or disk. Static method getTSMIteratorInMemory returns a TSMIterator that reads + * from memory, and static method getTSMIteratorInDisk returns a TSMIterator that reads from disk. + */ +public class TSMIterator implements Iterator> { private static Logger LOG = LoggerFactory.getLogger(TSMIterator.class); protected Map> chunkMetadataListMap = new TreeMap<>(); protected Iterator>> iterator; @@ -65,10 +71,12 @@ public boolean hasNext() { } @Override - public TimeseriesMetadata next() { + public Pair next() { Map.Entry> nextEntry = iterator.next(); try { - return constructOneTimeseriesMetadata(nextEntry.getKey(), nextEntry.getValue()); + return new Pair<>( + nextEntry.getKey().getFullPath(), + constructOneTimeseriesMetadata(nextEntry.getKey(), nextEntry.getValue())); } catch (IOException e) { LOG.error("Meets IOException when getting next TimeseriesMetadata", e); return null; diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java similarity index 63% rename from tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java rename to tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index 6160c31d9d5a0..5eb9316ecc730 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriterTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -19,15 +19,24 @@ package org.apache.iotdb.tsfile.write.writer; +import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor; +import org.apache.iotdb.tsfile.encoding.encoder.Encoder; +import org.apache.iotdb.tsfile.encoding.encoder.TSEncodingBuilder; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; +import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; +import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; +import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; +import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; import org.apache.iotdb.tsfile.write.TsFileIntegrityCheckingTool; import org.apache.iotdb.tsfile.write.chunk.AlignedChunkWriterImpl; import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; +import org.apache.iotdb.tsfile.write.chunk.TimeChunkWriter; +import org.apache.iotdb.tsfile.write.chunk.ValueChunkWriter; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; import org.apache.iotdb.tsfile.write.writer.tsmiterator.TSMIterator; @@ -46,12 +55,12 @@ import java.util.Map; import java.util.Random; -public class MemoryControlTsFileIOWriterTest extends MemoryControlTsFileIOWriter { +public class TsFileIOWriterMemoryControlTest { private static File testFile = new File("target", "1-1-0-0.tsfile"); private static File emptyFile = new File("target", "temp"); private long TEST_CHUNK_SIZE = 1000; - private List measurementDictInOrder = new ArrayList<>(); - private List deviceDictInOrder = new ArrayList<>(); + private List sortedSeriesId = new ArrayList<>(); + private List sortedDeviceId = new ArrayList<>(); private boolean init = false; @Before @@ -59,17 +68,17 @@ public void setUp() throws IOException { if (!init) { init = true; for (int i = 0; i < 2048; ++i) { - measurementDictInOrder.add("s" + i); - deviceDictInOrder.add("root.sg.d" + i); + sortedSeriesId.add("s" + i); + sortedDeviceId.add("root.sg.d" + i); } - measurementDictInOrder.sort((String::compareTo)); - deviceDictInOrder.sort((String::compareTo)); + sortedSeriesId.sort((String::compareTo)); + sortedDeviceId.sort((String::compareTo)); } + TEST_CHUNK_SIZE = 1000; } @After public void tearDown() throws IOException { - this.close(); if (testFile.exists()) { FileUtils.delete(testFile); } @@ -84,19 +93,15 @@ public void tearDown() throws IOException { } } - public MemoryControlTsFileIOWriterTest() throws IOException { - super(emptyFile, 1024); - } - /** The following tests is for ChunkMetadata serialization and deserialization. */ @Test public void testSerializeAndDeserializeChunkMetadata() throws IOException { try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024 * 1024 * 10)) { List originChunkMetadataList = new ArrayList<>(); - for (int i = 0; i < 1; ++i) { - String deviceId = deviceDictInOrder.get(i); + for (int i = 0; i < 10; ++i) { + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); - for (int j = 0; j < 1; ++j) { + for (int j = 0; j < 5; ++j) { ChunkWriterImpl chunkWriter; switch (j) { case 0: @@ -129,155 +134,120 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { writer.chunkMetadataTempFile, writer.chunkGroupMetadataList, writer.endPosInCMTForDevice); - // for (int i = 0; i < originChunkMetadataList.size(); ++i) { - // Pair chunkMetadataPair = - // window.getNextSeriesNameAndChunkMetadata(); - // Assert.assertEquals( - // deviceDictInOrder.get(i / 5) + "." + measurementDictInOrder.get(i % 5), - // chunkMetadataPair.left); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getStartTime(), - // chunkMetadataPair.right.getStartTime()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getEndTime(), - // chunkMetadataPair.right.getEndTime()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getDataType(), - // chunkMetadataPair.right.getDataType()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getStatistics(), - // chunkMetadataPair.right.getStatistics()); - // } for (int i = 0; iterator.hasNext(); ++i) { - TimeseriesMetadata timeseriesMetadata = iterator.next(); + Pair timeseriesMetadataPair = iterator.next(); + TimeseriesMetadata timeseriesMetadata = timeseriesMetadataPair.right; + Assert.assertEquals(sortedSeriesId.get(i % 5), timeseriesMetadata.getMeasurementId()); Assert.assertEquals( - measurementDictInOrder.get(i % 5), timeseriesMetadata.getMeasurementId()); + originChunkMetadataList.get(i).getDataType(), timeseriesMetadata.getTSDataType()); + Assert.assertEquals( + originChunkMetadataList.get(i).getStatistics(), timeseriesMetadata.getStatistics()); + } + } + } + + @Test + public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024 * 1024 * 10)) { + List originChunkMetadataList = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); + chunkWriter.writeToFileWriter(writer); + originChunkMetadataList.addAll(writer.chunkMetadataList); + writer.endChunkGroup(); + } + Map> originChunkMetadata = writer.groupChunkMetadataListBySeries(); + writer.sortAndFlushChunkMetadata(); + writer.tempOutput.flush(); + + List measurementIds = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + measurementIds.add(sortedDeviceId.get(i) + "."); + for (int j = 1; j <= 6; ++j) { + measurementIds.add(sortedDeviceId.get(i) + ".s" + j); + } + } + TSMIterator iterator = + TSMIterator.getTSMIteratorInDisk( + writer.chunkMetadataTempFile, new ArrayList<>(), writer.endPosInCMTForDevice); + for (int i = 0; iterator.hasNext(); ++i) { + Pair timeseriesMetadataPair = iterator.next(); + String fullPath = timeseriesMetadataPair.left; + TimeseriesMetadata timeseriesMetadata = timeseriesMetadataPair.right; + Assert.assertEquals(measurementIds.get(i), fullPath); Assert.assertEquals( originChunkMetadataList.get(i).getDataType(), timeseriesMetadata.getTSDataType()); + Assert.assertEquals( + originChunkMetadataList.get(i).getStatistics(), timeseriesMetadata.getStatistics()); } } } - // @Test - // public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException { - // try (MemoryControlTsFileIOWriter writer = - // new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { - // List originChunkMetadataList = new ArrayList<>(); - // for (int i = 0; i < 10; ++i) { - // String deviceId = deviceDictInOrder.get(i); - // writer.startChunkGroup(deviceId); - // AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); - // chunkWriter.writeToFileWriter(writer); - // originChunkMetadataList.addAll(writer.chunkMetadataList); - // writer.endChunkGroup(); - // } - // Map> originChunkMetadata = - // writer.groupChunkMetadataListBySeries(); - // writer.sortAndFlushChunkMetadata(); - // writer.tempOutput.flush(); - // - // ChunkMetadataReadIterator window = - // new ChunkMetadataReadIterator( - // 0, - // writer.chunkMetadataTempFile.length(), - // new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - // List measurementIds = new ArrayList<>(); - // for (int i = 0; i < 10; ++i) { - // measurementIds.add(deviceDictInOrder.get(i) + "."); - // for (int j = 1; j <= 6; ++j) { - // measurementIds.add(deviceDictInOrder.get(i) + ".s" + j); - // } - // } - // for (String measurementId : measurementIds) { - // List chunkMetadata = new ArrayList<>(); - // String seriesId = window.getAllChunkMetadataForNextSeries(chunkMetadata); - // Assert.assertEquals(measurementId, seriesId); - // Assert.assertEquals( - // originChunkMetadata.get(new Path(measurementId)).size(), chunkMetadata.size()); - // for (int i = 0; i < chunkMetadata.size(); ++i) { - // Assert.assertEquals( - // originChunkMetadata.get(new Path(measurementId)).get(i).getStatistics(), - // chunkMetadata.get(i).getStatistics()); - // Assert.assertEquals( - // originChunkMetadata.get(new Path(measurementId)).get(i).getDataType(), - // chunkMetadata.get(i).getDataType()); - // } - // } - // } - // } - - // @Test - // public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { - // try (MemoryControlTsFileIOWriter writer = - // new MemoryControlTsFileIOWriter(testFile, 1024 * 1024 * 10)) { - // List originChunkMetadataList = new ArrayList<>(); - // List seriesIds = new ArrayList<>(); - // for (int i = 0; i < 10; ++i) { - // String deviceId = deviceDictInOrder.get(i); - // writer.startChunkGroup(deviceId); - // if (i % 2 == 0) { - // // write normal series - // for (int j = 0; j < 5; ++j) { - // ChunkWriterImpl chunkWriter; - // switch (j) { - // case 0: - // chunkWriter = generateIntData(j, 0L, new ArrayList<>()); - // break; - // case 1: - // chunkWriter = generateBooleanData(j, 0L, new ArrayList<>()); - // break; - // case 2: - // chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); - // break; - // case 3: - // chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); - // break; - // case 4: - // default: - // chunkWriter = generateTextData(j, 0L, new ArrayList<>()); - // break; - // } - // chunkWriter.writeToFileWriter(writer); - // seriesIds.add(deviceId + "." + measurementDictInOrder.get(j)); - // } - // } else { - // // write vector - // AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); - // chunkWriter.writeToFileWriter(writer); - // seriesIds.add(deviceId + "."); - // for (int l = 1; l <= 6; ++l) { - // seriesIds.add(deviceId + ".s" + l); - // } - // } - // originChunkMetadataList.addAll(writer.chunkMetadataList); - // writer.endChunkGroup(); - // } - // writer.sortAndFlushChunkMetadata(); - // writer.tempOutput.flush(); - // - // ChunkMetadataReadIterator window = - // new ChunkMetadataReadIterator( - // 0, - // writer.chunkMetadataTempFile.length(), - // new LocalTsFileInput(writer.chunkMetadataTempFile.toPath())); - // for (int i = 0; i < originChunkMetadataList.size(); ++i) { - // Pair chunkMetadataPair = - // window.getNextSeriesNameAndChunkMetadata(); - // Assert.assertEquals(seriesIds.get(i), chunkMetadataPair.left); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getStartTime(), - // chunkMetadataPair.right.getStartTime()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getEndTime(), chunkMetadataPair.right.getEndTime()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getDataType(), - // chunkMetadataPair.right.getDataType()); - // Assert.assertEquals( - // originChunkMetadataList.get(i).getStatistics(), - // chunkMetadataPair.right.getStatistics()); - // } - // } - // } + @Test + public void testSerializeAndDeserializeMixedChunkMetadata() throws IOException { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024 * 1024 * 10)) { + List originChunkMetadataList = new ArrayList<>(); + List seriesIds = new ArrayList<>(); + for (int i = 0; i < 10; ++i) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + if (i % 2 == 0) { + // write normal series + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + chunkWriter = generateIntData(j, 0L, new ArrayList<>()); + break; + case 1: + chunkWriter = generateBooleanData(j, 0L, new ArrayList<>()); + break; + case 2: + chunkWriter = generateFloatData(j, 0L, new ArrayList<>()); + break; + case 3: + chunkWriter = generateDoubleData(j, 0L, new ArrayList<>()); + break; + case 4: + default: + chunkWriter = generateTextData(j, 0L, new ArrayList<>()); + break; + } + chunkWriter.writeToFileWriter(writer); + seriesIds.add(deviceId + "." + sortedSeriesId.get(j)); + } + } else { + // write vector + AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, new ArrayList<>(), 6); + chunkWriter.writeToFileWriter(writer); + seriesIds.add(deviceId + "."); + for (int l = 1; l <= 6; ++l) { + seriesIds.add(deviceId + ".s" + l); + } + } + originChunkMetadataList.addAll(writer.chunkMetadataList); + writer.endChunkGroup(); + } + writer.sortAndFlushChunkMetadata(); + writer.tempOutput.flush(); + + TSMIterator iterator = + TSMIterator.getTSMIteratorInDisk( + writer.chunkMetadataTempFile, new ArrayList<>(), writer.endPosInCMTForDevice); + for (int i = 0; i < originChunkMetadataList.size(); ++i) { + Pair timeseriesMetadataPair = iterator.next(); + Assert.assertEquals(seriesIds.get(i), timeseriesMetadataPair.left); + Assert.assertEquals( + originChunkMetadataList.get(i).getDataType(), + timeseriesMetadataPair.right.getTSDataType()); + Assert.assertEquals( + originChunkMetadataList.get(i).getStatistics(), + timeseriesMetadataPair.right.getStatistics()); + } + } + } /** The following tests is for writing normal series in different nums. */ @@ -290,9 +260,9 @@ public void testSerializeAndDeserializeChunkMetadata() throws IOException { @Test public void testWriteCompleteFileWithNormalChunk() throws IOException { Map>>>> originData = new HashMap<>(); - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { List> valList = new ArrayList<>(); @@ -319,7 +289,7 @@ public void testWriteCompleteFileWithNormalChunk() throws IOException { writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } writer.endChunkGroup(); @@ -340,9 +310,9 @@ public void testWriteCompleteFileWithNormalChunk() throws IOException { @Test public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { Map>>>> originData = new HashMap<>(); - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { ChunkWriterImpl chunkWriter; @@ -352,10 +322,9 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -364,10 +333,9 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -376,10 +344,9 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -388,10 +355,9 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -401,14 +367,14 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; } + writer.checkMetadataSizeAndMayFlush(); } writer.endChunkGroup(); } @@ -430,9 +396,9 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { Map>>>> originData = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 10; - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 2; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { ChunkWriterImpl chunkWriter; @@ -442,10 +408,9 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -454,10 +419,9 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -466,10 +430,9 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -478,10 +441,9 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -491,14 +453,14 @@ public void testWriteCompleteFileWithEnormousNormalChunk() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originData .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; } + writer.checkMetadataSizeAndMayFlush(); } writer.endChunkGroup(); } @@ -522,9 +484,9 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { Map>>>> originTimes = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 1; - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 2; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 1024; ++j) { ChunkWriterImpl chunkWriter; @@ -534,10 +496,9 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -546,10 +507,9 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -558,10 +518,9 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -570,10 +529,9 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -583,14 +541,14 @@ public void testWriteCompleteFileWithEnormousSeriesNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; } + writer.checkMetadataSizeAndMayFlush(); } writer.endChunkGroup(); } @@ -614,9 +572,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { Map>>>> originTimes = new HashMap<>(); long originTestChunkSize = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 10; - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 1024; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); for (int j = 0; j < 5; ++j) { ChunkWriterImpl chunkWriter; @@ -626,10 +584,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -638,10 +595,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -650,10 +606,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -662,10 +617,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; @@ -675,14 +629,14 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { List> valList = new ArrayList<>(); chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); chunkWriter.writeToFileWriter(writer); - writer.checkMetadataSizeAndMayFlush(); originTimes .computeIfAbsent(deviceId, x -> new HashMap<>()) - .computeIfAbsent(measurementDictInOrder.get(j), x -> new ArrayList<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) .add(valList); } break; } + writer.checkMetadataSizeAndMayFlush(); } writer.endChunkGroup(); } @@ -705,9 +659,9 @@ public void testWriteCompleteFileWithEnormousDeviceNum() throws IOException { @Test public void testWriteCompleteFileWithAlignedSeriesWithOneChunk() throws IOException { Map>>>> originData = new HashMap<>(); - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); writer.startChunkGroup(deviceId); List>> valList = new ArrayList<>(); AlignedChunkWriterImpl chunkWriter = generateVectorData(0L, valList, 6); @@ -738,9 +692,9 @@ public void testWriteCompleteFileWithAlignedSeriesWithOneChunk() throws IOExcept public void testWriteCompleteFileWithAlignedSeriesWithMultiChunks() throws IOException { Map>>>> originData = new HashMap<>(); int chunkNum = 512, seriesNum = 6; - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 1; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); for (int k = 0; k < chunkNum; ++k) { writer.startChunkGroup(deviceId); List>> valList = new ArrayList<>(); @@ -777,9 +731,9 @@ public void testWriteCompleteFileWithAlignedSeriesWithManyComponents() throws IO long originTestPointNum = TEST_CHUNK_SIZE; TEST_CHUNK_SIZE = 10; try { - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < 10; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); for (int k = 0; k < chunkNum; ++k) { writer.startChunkGroup(deviceId); List>> valList = new ArrayList<>(); @@ -815,9 +769,9 @@ public void testWriteCompleteFileWithLotsAlignedSeries() throws IOException { TEST_CHUNK_SIZE = 10; int deviceNum = 1024; try { - try (MemoryControlTsFileIOWriter writer = new MemoryControlTsFileIOWriter(testFile, 1024)) { + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { for (int i = 0; i < deviceNum; ++i) { - String deviceId = deviceDictInOrder.get(i); + String deviceId = sortedDeviceId.get(i); for (int k = 0; k < chunkNum; ++k) { writer.startChunkGroup(deviceId); List>> valList = new ArrayList<>(); @@ -845,12 +799,163 @@ public void testWriteCompleteFileWithLotsAlignedSeries() throws IOException { TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } + @Test + public void testWritingAlignedSeriesByColumnWithMultiComponents() throws IOException { + Map>>>> originValue = new HashMap<>(); + TEST_CHUNK_SIZE = 10; + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { + for (int i = 0; i < 5; i++) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + TSEncoding timeEncoding = + TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()); + TSDataType timeType = TSFileDescriptor.getInstance().getConfig().getTimeSeriesDataType(); + Encoder encoder = TSEncodingBuilder.getEncodingBuilder(timeEncoding).getEncoder(timeType); + for (int chunkIdx = 0; chunkIdx < 10; ++chunkIdx) { + TimeChunkWriter timeChunkWriter = + new TimeChunkWriter("", CompressionType.SNAPPY, TSEncoding.PLAIN, encoder); + for (long j = TEST_CHUNK_SIZE * chunkIdx; j < TEST_CHUNK_SIZE * (chunkIdx + 1); ++j) { + timeChunkWriter.write(j); + } + timeChunkWriter.writeToFileWriter(writer); + } + writer.sortAndFlushChunkMetadata(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + for (int k = 0; k < 1024; ++k) { + TSEncodingBuilder builder = TSEncodingBuilder.getEncodingBuilder(TSEncoding.PLAIN); + builder.initFromProps(null); + for (int chunkIdx = 0; chunkIdx < 10; ++chunkIdx) { + ValueChunkWriter chunkWriter = + new ValueChunkWriter( + sortedSeriesId.get(k), + CompressionType.SNAPPY, + TSDataType.DOUBLE, + TSEncoding.PLAIN, + builder.getEncoder(TSDataType.DOUBLE)); + Random random = new Random(); + List> valueList = new ArrayList<>(); + for (long j = TEST_CHUNK_SIZE * chunkIdx; j < TEST_CHUNK_SIZE * (chunkIdx + 1); ++j) { + double val = random.nextDouble(); + chunkWriter.write(j, val, false); + valueList.add(new Pair<>((long) j, new TsPrimitiveType.TsDouble(val))); + } + chunkWriter.writeToFileWriter(writer); + originValue + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(k), x -> new ArrayList<>()) + .add(valueList); + } + writer.sortAndFlushChunkMetadata(); + } + writer.endChunkGroup(); + } + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originValue); + } + + @Test + public void testWritingCompleteMixedFiles() throws IOException { + Map>>>> originData = new HashMap<>(); + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { + for (int i = 0; i < 5; ++i) { + String deviceId = sortedDeviceId.get(i); + for (int k = 0; k < 10; ++k) { + writer.startChunkGroup(deviceId); + List>> valList = new ArrayList<>(); + AlignedChunkWriterImpl chunkWriter = generateVectorData(k * TEST_CHUNK_SIZE, valList, 6); + for (int j = 1; j <= 6; ++j) { + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent("s" + j, x -> new ArrayList<>()) + .add(valList.get(j - 1)); + } + + chunkWriter.writeToFileWriter(writer); + writer.endChunkGroup(); + } + writer.checkMetadataSizeAndMayFlush(); + } + for (int i = 5; i < 10; ++i) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 1: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 2: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 3: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 4: + default: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + } + writer.checkMetadataSizeAndMayFlush(); + } + writer.endChunkGroup(); + } + writer.endFile(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + /** The following tests is for writing mixed of normal series and aligned series */ private ChunkWriterImpl generateIntData( int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl( - new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.INT64)); + new ChunkWriterImpl(new MeasurementSchema(sortedSeriesId.get(idx), TSDataType.INT64)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { long val = random.nextLong(); @@ -863,8 +968,7 @@ private ChunkWriterImpl generateIntData( private ChunkWriterImpl generateFloatData( int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl( - new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.FLOAT)); + new ChunkWriterImpl(new MeasurementSchema(sortedSeriesId.get(idx), TSDataType.FLOAT)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { float val = random.nextFloat(); @@ -877,8 +981,7 @@ private ChunkWriterImpl generateFloatData( private ChunkWriterImpl generateDoubleData( int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl( - new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.DOUBLE)); + new ChunkWriterImpl(new MeasurementSchema(sortedSeriesId.get(idx), TSDataType.DOUBLE)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { double val = random.nextDouble(); @@ -891,8 +994,7 @@ private ChunkWriterImpl generateDoubleData( private ChunkWriterImpl generateBooleanData( int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl( - new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.BOOLEAN)); + new ChunkWriterImpl(new MeasurementSchema(sortedSeriesId.get(idx), TSDataType.BOOLEAN)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { boolean val = random.nextBoolean(); @@ -958,8 +1060,7 @@ private AlignedChunkWriterImpl generateVectorData( private ChunkWriterImpl generateTextData( int idx, long startTime, List> record) { ChunkWriterImpl chunkWriter = - new ChunkWriterImpl( - new MeasurementSchema(measurementDictInOrder.get(idx), TSDataType.TEXT)); + new ChunkWriterImpl(new MeasurementSchema(sortedSeriesId.get(idx), TSDataType.TEXT)); Random random = new Random(); for (long i = startTime; i < startTime + TEST_CHUNK_SIZE; ++i) { Binary val = new Binary(String.valueOf(random.nextDouble())); From 50884208918701339d7f9693d5d1059e5ed1bd0e Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 16 Sep 2022 16:02:27 +0800 Subject: [PATCH 23/31] remove MemoryControlTsFileIOWriter --- .../utils/SingleSeriesCompactionExecutor.java | 3 +- .../writer/MemoryControlTsFileIOWriter.java | 60 ------------------- .../TsFileIOWriterMemoryControlTest.java | 6 +- 3 files changed, 3 insertions(+), 66 deletions(-) delete mode 100644 tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java index e336733a66a1f..b6d587086da13 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java @@ -37,7 +37,6 @@ import org.apache.iotdb.tsfile.write.chunk.ChunkWriterImpl; import org.apache.iotdb.tsfile.write.schema.IMeasurementSchema; import org.apache.iotdb.tsfile.write.schema.MeasurementSchema; -import org.apache.iotdb.tsfile.write.writer.MemoryControlTsFileIOWriter; import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import com.google.common.util.concurrent.RateLimiter; @@ -78,7 +77,7 @@ public SingleSeriesCompactionExecutor( PartialPath series, IMeasurementSchema measurementSchema, LinkedList>> readerAndChunkMetadataList, - MemoryControlTsFileIOWriter fileWriter, + TsFileIOWriter fileWriter, TsFileResource targetResource) { this.device = series.getDevice(); this.series = series; diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java deleted file mode 100644 index 986d4b37db5ba..0000000000000 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/MemoryControlTsFileIOWriter.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.iotdb.tsfile.write.writer; - -import org.apache.iotdb.tsfile.read.common.Path; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; - -/** - * This writer control the total size of chunk metadata to avoid OOM when writing massive - * timeseries. This writer can only be used in the scenarios where the chunk is written in - * order. The order means lexicographical order and time order. The lexicographical order - * requires that, if the writer is going to write a series S, all data of the all series - * smaller than S in lexicographical order has been written to the writer. The time order - * requires that, for a single series S, if the writer is going to write a chunk C of - * it, all chunks of S whose start time is smaller than C should have been written to - * the writer. If you do not comply with the above requirements, metadata index tree may be - * generated incorrectly. As a result, the file cannot be queried correctly. - */ -public class MemoryControlTsFileIOWriter extends TsFileIOWriter { - private static final Logger LOG = LoggerFactory.getLogger(MemoryControlTsFileIOWriter.class); - protected long maxMetadataSize; - protected long currentChunkMetadataSize = 0L; - protected File chunkMetadataTempFile; - protected LocalTsFileOutput tempOutput; - protected volatile boolean hasChunkMetadataInDisk = false; - protected String currentSeries = null; - // record the total num of path in order to make bloom filter - protected int pathCount = 0; - Path lastSerializePath = null; - - public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".cmt"; - - public MemoryControlTsFileIOWriter(File file, long maxMetadataSize) throws IOException { - super(file); - this.maxMetadataSize = maxMetadataSize; - this.chunkMetadataTempFile = new File(file.getAbsoluteFile() + CHUNK_METADATA_TEMP_FILE_SUFFIX); - } -} diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index 5eb9316ecc730..63cc03ff50cc3 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -82,11 +82,9 @@ public void tearDown() throws IOException { if (testFile.exists()) { FileUtils.delete(testFile); } - if (new File(testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX) - .exists()) { + if (new File(testFile.getPath() + TsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX).exists()) { FileUtils.delete( - new File( - testFile.getPath() + MemoryControlTsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX)); + new File(testFile.getPath() + TsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX)); } if (emptyFile.exists()) { FileUtils.delete(emptyFile); From cbd9061c867ad984e2f8a32acd5bc282f008acfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E6=B2=9B=E8=BE=B0?= <45144903+choubenson@users.noreply.github.com> Date: Fri, 16 Sep 2022 16:16:50 +0800 Subject: [PATCH 24/31] add ci (#7353) --- .../TsFileIOWriterMemoryControlTest.java | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index 5eb9316ecc730..e5490aafe4c45 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -951,6 +951,112 @@ public void testWritingCompleteMixedFiles() throws IOException { TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } + @Test + public void testWritingAlignedSeriesByColumn() throws IOException { + Map>>>> originValue = new HashMap<>(); + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { + for (int i = 0; i < 5; i++) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + TSEncoding timeEncoding = + TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()); + TSDataType timeType = TSFileDescriptor.getInstance().getConfig().getTimeSeriesDataType(); + Encoder encoder = TSEncodingBuilder.getEncodingBuilder(timeEncoding).getEncoder(timeType); + TimeChunkWriter timeChunkWriter = + new TimeChunkWriter("", CompressionType.SNAPPY, TSEncoding.PLAIN, encoder); + for (int j = 0; j < TEST_CHUNK_SIZE; ++j) { + timeChunkWriter.write(j); + } + timeChunkWriter.writeToFileWriter(writer); + writer.sortAndFlushChunkMetadata(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + for (int k = 0; k < 5; ++k) { + TSEncodingBuilder builder = TSEncodingBuilder.getEncodingBuilder(TSEncoding.PLAIN); + builder.initFromProps(null); + ValueChunkWriter chunkWriter = + new ValueChunkWriter( + sortedSeriesId.get(k), + CompressionType.SNAPPY, + TSDataType.DOUBLE, + TSEncoding.PLAIN, + builder.getEncoder(TSDataType.DOUBLE)); + Random random = new Random(); + List> valueList = new ArrayList<>(); + for (int j = 0; j < TEST_CHUNK_SIZE; ++j) { + double val = random.nextDouble(); + chunkWriter.write(j, val, false); + valueList.add(new Pair<>((long) j, new TsPrimitiveType.TsDouble(val))); + } + chunkWriter.writeToFileWriter(writer); + originValue + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(k), x -> new ArrayList<>()) + .add(valueList); + writer.sortAndFlushChunkMetadata(); + } + writer.endChunkGroup(); + } + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originValue); + } + + @Test + public void testWritingAlignedSeriesByColumnWithMultiChunks() throws IOException { + Map>>>> originValue = new HashMap<>(); + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { + for (int i = 0; i < 5; i++) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + TSEncoding timeEncoding = + TSEncoding.valueOf(TSFileDescriptor.getInstance().getConfig().getTimeEncoder()); + TSDataType timeType = TSFileDescriptor.getInstance().getConfig().getTimeSeriesDataType(); + Encoder encoder = TSEncodingBuilder.getEncodingBuilder(timeEncoding).getEncoder(timeType); + for (int chunkIdx = 0; chunkIdx < 10; ++chunkIdx) { + TimeChunkWriter timeChunkWriter = + new TimeChunkWriter("", CompressionType.SNAPPY, TSEncoding.PLAIN, encoder); + for (long j = TEST_CHUNK_SIZE * chunkIdx; j < TEST_CHUNK_SIZE * (chunkIdx + 1); ++j) { + timeChunkWriter.write(j); + } + timeChunkWriter.writeToFileWriter(writer); + } + writer.sortAndFlushChunkMetadata(); + Assert.assertTrue(writer.hasChunkMetadataInDisk); + for (int k = 0; k < 5; ++k) { + TSEncodingBuilder builder = TSEncodingBuilder.getEncodingBuilder(TSEncoding.PLAIN); + builder.initFromProps(null); + for (int chunkIdx = 0; chunkIdx < 10; ++chunkIdx) { + ValueChunkWriter chunkWriter = + new ValueChunkWriter( + sortedSeriesId.get(k), + CompressionType.SNAPPY, + TSDataType.DOUBLE, + TSEncoding.PLAIN, + builder.getEncoder(TSDataType.DOUBLE)); + Random random = new Random(); + List> valueList = new ArrayList<>(); + for (long j = TEST_CHUNK_SIZE * chunkIdx; j < TEST_CHUNK_SIZE * (chunkIdx + 1); ++j) { + double val = random.nextDouble(); + chunkWriter.write(j, val, false); + valueList.add(new Pair<>((long) j, new TsPrimitiveType.TsDouble(val))); + } + chunkWriter.writeToFileWriter(writer); + originValue + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(k), x -> new ArrayList<>()) + .add(valueList); + } + writer.sortAndFlushChunkMetadata(); + } + writer.endChunkGroup(); + } + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originValue); + } + /** The following tests is for writing mixed of normal series and aligned series */ private ChunkWriterImpl generateIntData( int idx, long startTime, List> record) { From 941ee35e5944125e14da72bea72ff856461bfc8a Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Fri, 16 Sep 2022 17:25:24 +0800 Subject: [PATCH 25/31] adapt chunk metadata size control for writing --- .../resources/conf/iotdb-datanode.properties | 8 ++ .../org/apache/iotdb/db/conf/IoTDBConfig.java | 21 +++++ .../apache/iotdb/db/conf/IoTDBDescriptor.java | 11 +++ .../utils/SingleSeriesCompactionExecutor.java | 4 +- .../impl/ReadChunkCompactionPerformer.java | 10 ++- .../writer/CrossSpaceCompactionWriter.java | 12 +-- .../writer/InnerSpaceCompactionWriter.java | 10 ++- .../db/engine/flush/MemTableFlushTask.java | 23 +++-- .../engine/storagegroup/TsFileProcessor.java | 9 +- .../engine/storagegroup/TsFileResource.java | 3 + .../writer/RestorableTsFileIOWriter.java | 11 +++ .../tsfile/write/writer/TsFileIOWriter.java | 8 +- .../TsFileIOWriterMemoryControlTest.java | 87 +++++++++++++++++++ 13 files changed, 190 insertions(+), 27 deletions(-) diff --git a/server/src/assembly/resources/conf/iotdb-datanode.properties b/server/src/assembly/resources/conf/iotdb-datanode.properties index 99afaf642dc1b..ef0fbce7acb82 100644 --- a/server/src/assembly/resources/conf/iotdb-datanode.properties +++ b/server/src/assembly/resources/conf/iotdb-datanode.properties @@ -451,6 +451,10 @@ timestamp_precision=ms # Datatype: int # primitive_array_size=32 +# the percentage of write memory for chunk metadata remains in a single file writer when flushing memtable +# Datatype: double +# chunk_metadata_size_proportion_in_write=0.1 + # Ratio of write memory for invoking flush disk, 0.4 by default # If you have extremely high write load (like batch=1000), it can be set lower than the default value like 0.2 # Datatype: double @@ -567,6 +571,10 @@ timestamp_precision=ms # BALANCE: alternate two compaction types # compaction_priority=BALANCE +# size proportion for chunk metadata maintains in memory when compacting +# Datatype: double +# chunk_metadata_size_proportion_in_compaction=0.05 + # The target tsfile size in compaction # Datatype: long, Unit: byte # target_compaction_file_size=1073741824 diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java index d76734b8a778b..fcf7ef99fd8d1 100644 --- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java +++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBConfig.java @@ -157,6 +157,8 @@ public class IoTDBConfig { /** The proportion of write memory for memtable */ private double writeProportion = 0.8; + private double chunkMetadataSizeProportionInWrite = 0.1; + /** The proportion of write memory for compaction */ private double compactionProportion = 0.2; @@ -434,6 +436,8 @@ public class IoTDBConfig { */ private CompactionPriority compactionPriority = CompactionPriority.BALANCE; + private double chunkMetadataSizeProportionInCompaction = 0.05; + /** The target tsfile size in compaction, 1 GB by default */ private long targetCompactionFileSize = 1073741824L; @@ -3141,6 +3145,23 @@ public void setThrottleThreshold(long throttleThreshold) { this.throttleThreshold = throttleThreshold; } + public double getChunkMetadataSizeProportionInWrite() { + return chunkMetadataSizeProportionInWrite; + } + + public void setChunkMetadataSizeProportionInWrite(double chunkMetadataSizeProportionInWrite) { + this.chunkMetadataSizeProportionInWrite = chunkMetadataSizeProportionInWrite; + } + + public double getChunkMetadataSizeProportionInCompaction() { + return chunkMetadataSizeProportionInCompaction; + } + + public void setChunkMetadataSizeProportionInCompaction( + double chunkMetadataSizeProportionInCompaction) { + this.chunkMetadataSizeProportionInCompaction = chunkMetadataSizeProportionInCompaction; + } + public String getConfigMessage() { String configMessage = ""; String configContent; diff --git a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java index 9cd2402c7f863..4437b4edb6bbe 100644 --- a/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java +++ b/server/src/main/java/org/apache/iotdb/db/conf/IoTDBDescriptor.java @@ -604,6 +604,11 @@ public void loadProperties(Properties properties) { properties.getProperty( "concurrent_compaction_thread", Integer.toString(conf.getConcurrentCompactionThread())))); + conf.setChunkMetadataSizeProportionInCompaction( + Double.parseDouble( + properties.getProperty( + "chunk_metadata_size_proportion_in_compaction", + Double.toString(conf.getChunkMetadataSizeProportionInCompaction())))); conf.setTargetCompactionFileSize( Long.parseLong( properties.getProperty( @@ -1431,6 +1436,12 @@ public void loadHotModifiedProps(Properties properties) throws QueryProcessExcep // update tsfile-format config loadTsFileProps(properties); + conf.setChunkMetadataSizeProportionInWrite( + Double.parseDouble( + properties.getProperty( + "chunk_metadata_size_proportion_in_write", + Double.toString(conf.getChunkMetadataSizeProportionInWrite())))); + // update max_deduplicated_path_num conf.setMaxQueryDeduplicatedPathNum( Integer.parseInt( diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java index b6d587086da13..d1d4a366e7f2e 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/inner/utils/SingleSeriesCompactionExecutor.java @@ -151,6 +151,7 @@ public void execute() throws IOException { } else if (pointCountInChunkWriter != 0L) { flushChunkWriter(); } + fileWriter.checkMetadataSizeAndMayFlush(); targetResource.updateStartTime(device, minStartTimestamp); targetResource.updateEndTime(device, maxEndTimestamp); } @@ -310,7 +311,6 @@ private void flushChunkToFileWriter( false, getChunkSize(chunk)); fileWriter.writeChunk(chunk, chunkMetadata); - fileWriter.checkMetadataSizeAndMayFlush(); } private void flushChunkWriterIfLargeEnough() throws IOException { @@ -324,7 +324,6 @@ private void flushChunkWriterIfLargeEnough() throws IOException { false, chunkWriter.estimateMaxSeriesMemSize()); chunkWriter.writeToFileWriter(fileWriter); - fileWriter.checkMetadataSizeAndMayFlush(); pointCountInChunkWriter = 0L; } } @@ -347,7 +346,6 @@ private void flushChunkWriter() throws IOException { false, chunkWriter.estimateMaxSeriesMemSize()); chunkWriter.writeToFileWriter(fileWriter); - fileWriter.checkMetadataSizeAndMayFlush(); pointCountInChunkWriter = 0L; } } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java index 779a7c820d323..ac0fa1ddc46f8 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/performer/impl/ReadChunkCompactionPerformer.java @@ -66,10 +66,12 @@ public void perform() throws IOException, MetadataException, InterruptedException, StorageEngineException { // size for file writer is 5% of per compaction task memory budget long sizeForFileWriter = - SystemInfo.getInstance().getMemorySizeForCompaction() - / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() - * 5 - / 100L; + (long) + (SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * IoTDBDescriptor.getInstance() + .getConfig() + .getChunkMetadataSizeProportionInCompaction()); try (MultiTsFileDeviceIterator deviceIterator = new MultiTsFileDeviceIterator(seqFiles); TsFileIOWriter writer = new TsFileIOWriter(targetResource.getTsFile(), true, sizeForFileWriter)) { diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java index c9b945964eca5..d192c0f6d71df 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/CrossSpaceCompactionWriter.java @@ -65,11 +65,13 @@ public CrossSpaceCompactionWriter( isEmptyFile = new boolean[seqFileResources.size()]; isDeviceExistedInTargetFiles = new boolean[targetResources.size()]; long memorySizeForEachWriter = - SystemInfo.getInstance().getMemorySizeForCompaction() - / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() - * 5 - / 100L - / targetResources.size(); + (long) + (SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * IoTDBDescriptor.getInstance() + .getConfig() + .getChunkMetadataSizeProportionInCompaction() + / targetResources.size()); for (int i = 0; i < targetResources.size(); i++) { this.fileWriterList.add( new TsFileIOWriter(targetResources.get(i).getTsFile(), true, memorySizeForEachWriter)); diff --git a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java index 72de8a0e831ea..2c3c2e58ad07b 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/compaction/writer/InnerSpaceCompactionWriter.java @@ -38,10 +38,12 @@ public class InnerSpaceCompactionWriter extends AbstractCompactionWriter { public InnerSpaceCompactionWriter(TsFileResource targetFileResource) throws IOException { long sizeForFileWriter = - SystemInfo.getInstance().getMemorySizeForCompaction() - / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() - * 5 - / 100L; + (long) + (SystemInfo.getInstance().getMemorySizeForCompaction() + / IoTDBDescriptor.getInstance().getConfig().getConcurrentCompactionThread() + * IoTDBDescriptor.getInstance() + .getConfig() + .getChunkMetadataSizeProportionInCompaction()); this.fileWriter = new TsFileIOWriter(targetFileResource.getTsFile(), true, sizeForFileWriter); isEmptyFile = true; resource = targetFileResource; diff --git a/server/src/main/java/org/apache/iotdb/db/engine/flush/MemTableFlushTask.java b/server/src/main/java/org/apache/iotdb/db/engine/flush/MemTableFlushTask.java index a3ca350fea248..6c6ab965b6fd1 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/flush/MemTableFlushTask.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/flush/MemTableFlushTask.java @@ -38,6 +38,9 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; @@ -114,14 +117,19 @@ public void syncFlushMemTable() throws ExecutionException, InterruptedException long sortTime = 0; // for map do not use get(key) to iterate - for (Map.Entry memTableEntry : - memTable.getMemTableMap().entrySet()) { - encodingTaskQueue.put(new StartFlushGroupIOTask(memTableEntry.getKey().toStringID())); - - final Map value = memTableEntry.getValue().getMemChunkMap(); - for (Map.Entry iWritableMemChunkEntry : value.entrySet()) { + Map memTableMap = memTable.getMemTableMap(); + List deviceIDList = new ArrayList<>(memTableMap.keySet()); + // sort the IDeviceID in lexicographical order + deviceIDList.sort(Comparator.comparing(IDeviceID::toStringID)); + for (IDeviceID deviceID : deviceIDList) { + encodingTaskQueue.put(new StartFlushGroupIOTask(deviceID.toStringID())); + + final Map value = memTableMap.get(deviceID).getMemChunkMap(); + List seriesInOrder = new ArrayList<>(value.keySet()); + seriesInOrder.sort((String::compareTo)); + for (String seriesId : seriesInOrder) { long startTime = System.currentTimeMillis(); - IWritableMemChunk series = iWritableMemChunkEntry.getValue(); + IWritableMemChunk series = value.get(seriesId); /* * sort task (first task of flush pipeline) */ @@ -274,6 +282,7 @@ public void run() { this.writer.endChunkGroup(); } else { ((IChunkWriter) ioMessage).writeToFileWriter(this.writer); + writer.checkMetadataSizeAndMayFlush(); } } catch (IOException e) { LOGGER.error( diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileProcessor.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileProcessor.java index b8964b517a4e5..2eea52ace917c 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileProcessor.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileProcessor.java @@ -179,7 +179,14 @@ public class TsFileProcessor { this.storageGroupName = storageGroupName; this.tsFileResource = new TsFileResource(tsfile, this); this.storageGroupInfo = storageGroupInfo; - this.writer = new RestorableTsFileIOWriter(tsfile); + this.writer = + new RestorableTsFileIOWriter( + tsfile, + (long) + (IoTDBDescriptor.getInstance().getConfig().getMemtableSizeThreshold() + * IoTDBDescriptor.getInstance() + .getConfig() + .getChunkMetadataSizeProportionInWrite())); this.updateLatestFlushTimeCallback = updateLatestFlushTimeCallback; this.sequence = sequence; this.walNode = WALManager.getInstance().applyForWALNode(storageGroupName); diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java index 79f7b597c6b9f..6addc3934b6cb 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java @@ -44,6 +44,7 @@ import org.apache.iotdb.tsfile.read.filter.basic.Filter; import org.apache.iotdb.tsfile.utils.FilePathUtils; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; +import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -509,6 +510,8 @@ public void removeModFile() throws IOException { public boolean remove() { try { fsFactory.deleteIfExists(file); + fsFactory.deleteIfExists( + new File(file.getAbsolutePath() + TsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX)); } catch (IOException e) { LOGGER.error("TsFile {} cannot be deleted: {}", file, e.getMessage()); return false; diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java index 78253124b89a9..d885fe036ee8c 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java @@ -78,6 +78,17 @@ public RestorableTsFileIOWriter(File file) throws IOException { this(file, true); } + /** + * @param file a given tsfile path you want to (continue to) write + * @throws IOException if write failed, or the file is broken but autoRepair==false. + */ + public RestorableTsFileIOWriter(File file, long maxMetadataSize) throws IOException { + this(file, true); + this.maxMetadataSize = maxMetadataSize; + this.enableMemoryControl = true; + this.checkMetadataSizeAndMayFlush(); + } + public RestorableTsFileIOWriter(File file, boolean truncate) throws IOException { if (logger.isDebugEnabled()) { logger.debug("{} is opened.", file.getName()); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 53735fe8d2095..36a116c86f8f9 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -630,9 +630,10 @@ public void setMaxPlanIndex(long maxPlanIndex) { /** * Check if the size of chunk metadata in memory is greater than the given threshold. If so, the - * chunk metadata will be written to a temp files. Notice! If you are writing a aligned device, - * you should make sure all data of current writing device has been written before this method is - * called. For not aligned series, there is no such limitation. + * chunk metadata will be written to a temp files. Notice! If you are writing a aligned device + * in row, you should make sure all data of current writing device has been written before this + * method is called. For writing not aligned series or writing aligned series in column, you + * should make sure that all data of one series is written before you call this function. * * @throws IOException */ @@ -690,6 +691,7 @@ private void writeChunkMetadataToTempFile( // mark the end position of last device endPosInCMTForDevice.add(tempOutput.getPosition()); // serialize the device + // for each device, we only serialize it once, in order to save io ReadWriteIOUtils.write(seriesPath.getDevice(), tempOutput.wrapAsStream()); } if (!seriesPath.equals(lastSerializePath) && iChunkMetadataList.size() > 0) { diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index 63cc03ff50cc3..4a5d17165d2c7 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -383,6 +383,93 @@ public void testWriteCompleteFileWithMultipleNormalChunk() throws IOException { TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); } + /** + * Write a file with 10 devices and 5 series in each device. For each series, we write 100 chunks + * for it. We maintain some chunk metadata in memory when calling endFile(). + * + * @throws IOException + */ + @Test + public void testWriteCompleteFileWithMetadataRemainsInMemoryWhenEndFile() throws IOException { + Map>>>> originData = new HashMap<>(); + try (TsFileIOWriter writer = new TsFileIOWriter(testFile, true, 1024)) { + for (int i = 0; i < 10; ++i) { + String deviceId = sortedDeviceId.get(i); + writer.startChunkGroup(deviceId); + for (int j = 0; j < 5; ++j) { + ChunkWriterImpl chunkWriter; + switch (j) { + case 0: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateIntData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 1: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateBooleanData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 2: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateFloatData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 3: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateDoubleData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + case 4: + default: + for (int k = 0; k < 10; ++k) { + List> valList = new ArrayList<>(); + chunkWriter = generateTextData(j, (long) TEST_CHUNK_SIZE * k, valList); + chunkWriter.writeToFileWriter(writer); + originData + .computeIfAbsent(deviceId, x -> new HashMap<>()) + .computeIfAbsent(sortedSeriesId.get(j), x -> new ArrayList<>()) + .add(valList); + } + break; + } + if (i < 9) { + writer.checkMetadataSizeAndMayFlush(); + } + } + writer.endChunkGroup(); + } + Assert.assertTrue(writer.hasChunkMetadataInDisk); + Assert.assertFalse(writer.chunkMetadataList.isEmpty()); + writer.endFile(); + } + TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); + TsFileIntegrityCheckingTool.checkIntegrityByQuery(testFile.getPath(), originData); + } + /** * Write a file with 2 devices and 5 series in each device. For each series, we write 1024 chunks * for it. This test make sure that each chunk From 88e67bb95903c8ce9310c6a8693869cb381574b4 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Mon, 19 Sep 2022 10:50:43 +0800 Subject: [PATCH 26/31] check memory path count and and some comments --- .../file/AbstractTsFileRecoverPerformer.java | 18 +++++++++- .../file/metadata/AlignedChunkMetadata.java | 4 --- .../tsfile/file/metadata/ChunkMetadata.java | 35 ------------------- .../tsfile/file/metadata/IChunkMetadata.java | 2 -- .../tsfile/write/writer/TsFileIOWriter.java | 7 ++++ .../writer/tsmiterator/DiskTSMIterator.java | 8 +++-- .../write/writer/tsmiterator/TSMIterator.java | 7 ++-- 7 files changed, 34 insertions(+), 47 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/wal/recover/file/AbstractTsFileRecoverPerformer.java b/server/src/main/java/org/apache/iotdb/db/wal/recover/file/AbstractTsFileRecoverPerformer.java index f2c3934ccf1f5..e506d66c3a50f 100644 --- a/server/src/main/java/org/apache/iotdb/db/wal/recover/file/AbstractTsFileRecoverPerformer.java +++ b/server/src/main/java/org/apache/iotdb/db/wal/recover/file/AbstractTsFileRecoverPerformer.java @@ -18,13 +18,16 @@ */ package org.apache.iotdb.db.wal.recover.file; +import org.apache.iotdb.db.conf.IoTDBDescriptor; import org.apache.iotdb.db.engine.storagegroup.TsFileResource; import org.apache.iotdb.db.exception.DataRegionException; import org.apache.iotdb.db.utils.FileLoaderUtils; import org.apache.iotdb.tsfile.exception.NotCompatibleTsFileException; import org.apache.iotdb.tsfile.read.TsFileSequenceReader; import org.apache.iotdb.tsfile.write.writer.RestorableTsFileIOWriter; +import org.apache.iotdb.tsfile.write.writer.TsFileIOWriter; +import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,6 +58,12 @@ public AbstractTsFileRecoverPerformer(TsFileResource tsFileResource) { */ protected void recoverWithWriter() throws DataRegionException, IOException { File tsFile = tsFileResource.getTsFile(); + File chunkMetadataTempFile = + new File(tsFile.getAbsolutePath() + TsFileIOWriter.CHUNK_METADATA_TEMP_FILE_SUFFIX); + if (chunkMetadataTempFile.exists()) { + // delete chunk metadata temp file + FileUtils.delete(chunkMetadataTempFile); + } if (!tsFile.exists()) { logger.error("TsFile {} is missing, will skip its recovery.", tsFile); return; @@ -68,7 +77,14 @@ protected void recoverWithWriter() throws DataRegionException, IOException { // try to remove corrupted part of the TsFile try { - writer = new RestorableTsFileIOWriter(tsFile); + writer = + new RestorableTsFileIOWriter( + tsFile, + (long) + (IoTDBDescriptor.getInstance().getConfig().getMemtableSizeThreshold() + * IoTDBDescriptor.getInstance() + .getConfig() + .getChunkMetadataSizeProportionInWrite())); } catch (NotCompatibleTsFileException e) { boolean result = tsFile.delete(); logger.warn( diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java index e855ea4bad712..a69e61b9b8050 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/AlignedChunkMetadata.java @@ -183,10 +183,6 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) { throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); } - public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) { - throw new UnsupportedOperationException("VectorChunkMetadata doesn't support serial method"); - } - @Override public byte getMask() { return 0; diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java index a9bd99d739eb5..9ee1f7f566869 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/ChunkMetadata.java @@ -22,7 +22,6 @@ import org.apache.iotdb.tsfile.file.metadata.statistics.Statistics; import org.apache.iotdb.tsfile.read.common.TimeRange; import org.apache.iotdb.tsfile.read.controller.IChunkLoader; -import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.FilePathUtils; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.RamUsageEstimator; @@ -162,40 +161,6 @@ public int serializeTo(OutputStream outputStream, boolean serializeStatistic) th return byteLen; } - /** - * Serialize the chunk metadata with full path, data type and statistic - * - * @param outputStream OutputStream - * @param seriesFullPath the full path of the chunk metadata - * @return length - * @throws IOException - */ - public int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) - throws IOException { - int byteLen = 0; - byteLen += ReadWriteIOUtils.write(new Binary(seriesFullPath), outputStream); - byteLen += ReadWriteIOUtils.write(tsDataType, outputStream); - byteLen += this.serializeTo(outputStream, true); - return byteLen; - } - - /** - * Deserialize with full info, the result is store in param chunkMetadata - * - * @param buffer ByteBuffer - * @param chunkMetadata ChunkMetadata to store the result - * @return the full path of the measurement - * @throws IOException - */ - public static String deserializeWithFullInfo(ByteBuffer buffer, ChunkMetadata chunkMetadata) - throws IOException { - String fullPath = ReadWriteIOUtils.readBinary(buffer).toString(); - chunkMetadata.tsDataType = TSDataType.deserialize(ReadWriteIOUtils.readByte(buffer)); - chunkMetadata.offsetOfChunkHeader = ReadWriteIOUtils.readLong(buffer); - chunkMetadata.statistics = Statistics.deserialize(buffer, chunkMetadata.tsDataType); - return fullPath; - } - /** * deserialize from ByteBuffer. * diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java index 9d8c7a8b804db..1cc819fd52ddf 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/file/metadata/IChunkMetadata.java @@ -73,7 +73,5 @@ public interface IChunkMetadata { int serializeTo(OutputStream outputStream, boolean serializeStatistic) throws IOException; - int serializeWithFullInfo(OutputStream outputStream, String seriesFullPath) throws IOException; - byte getMask(); } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 3ff69bf8de2e2..64dd9260c7ecc 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -320,6 +320,7 @@ protected Map> groupChunkMetadataListBySeries() { */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning public void endFile() throws IOException { + checkInMemoryPathCount(); readChunkMetadataAndConstructIndexTree(); long footerIndex = out.getPosition(); @@ -338,6 +339,12 @@ public void endFile() throws IOException { canWrite = false; } + private void checkInMemoryPathCount() { + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + pathCount += chunkGroupMetadata.getChunkMetadataList().size(); + } + } + private void readChunkMetadataAndConstructIndexTree() throws IOException { if (tempOutput != null) { tempOutput.close(); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java index 64dcb11a99e63..ad623c0f65e0c 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -24,7 +24,6 @@ import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; -import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.read.reader.LocalTsFileInput; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils; @@ -39,6 +38,11 @@ import java.util.LinkedList; import java.util.List; +/** + * This class read ChunkMetadata iteratively from disk(.cmt file) and memory(list of + * ChunkGroupMetadata), and construct them as TimeseriesMetadata. It will read ChunkMetadata in disk + * first, and after all ChunkMetadata in disk is read, it will read ChunkMetadata in memory. + */ public class DiskTSMIterator extends TSMIterator { private static final Logger LOG = LoggerFactory.getLogger(DiskTSMIterator.class); @@ -115,7 +119,7 @@ private Pair getTimeSerisMetadataFromFile() throws I updateCurrentPos(); return new Pair<>( currentDevice + "." + measurementUid, - constructOneTimeseriesMetadata(new Path(currentDevice, measurementUid), chunkMetadataList)); + constructOneTimeseriesMetadata(measurementUid, chunkMetadataList)); } private void updateCurrentPos() throws IOException { diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index 030c7e19392d0..0fb62edd9b09d 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -76,7 +76,8 @@ public Pair next() { try { return new Pair<>( nextEntry.getKey().getFullPath(), - constructOneTimeseriesMetadata(nextEntry.getKey(), nextEntry.getValue())); + constructOneTimeseriesMetadata( + nextEntry.getKey().getMeasurement(), nextEntry.getValue())); } catch (IOException e) { LOG.error("Meets IOException when getting next TimeseriesMetadata", e); return null; @@ -96,7 +97,7 @@ protected void groupChunkMetadataListBySeries(List chunkGrou } protected TimeseriesMetadata constructOneTimeseriesMetadata( - Path path, List chunkMetadataList) throws IOException { + String measurementId, List chunkMetadataList) throws IOException { // create TimeseriesMetaData PublicBAOS publicBAOS = new PublicBAOS(); TSDataType dataType = chunkMetadataList.get(chunkMetadataList.size() - 1).getDataType(); @@ -118,7 +119,7 @@ protected TimeseriesMetadata constructOneTimeseriesMetadata( (byte) ((serializeStatistic ? (byte) 1 : (byte) 0) | chunkMetadataList.get(0).getMask()), chunkMetadataListLength, - path.getMeasurement(), + measurementId, dataType, seriesStatistics, publicBAOS); From 7ceeb2c53de49465f0461b7fc4f7a69ee7c0c7b7 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Mon, 19 Sep 2022 17:40:25 +0800 Subject: [PATCH 27/31] fix order bug in TSMIterator --- .../iotdb/tsfile/TsFileSequenceRead.java | 2 +- .../tsfile/write/writer/TsFileIOWriter.java | 74 ++----------------- .../write/writer/tsmiterator/TSMIterator.java | 42 ++++++++--- .../TsFileIOWriterMemoryControlTest.java | 2 +- 4 files changed, 38 insertions(+), 82 deletions(-) diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java index aa946f67b7d5e..aafb358212c70 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java @@ -53,7 +53,7 @@ public class TsFileSequenceRead { "squid:S106" }) // Suppress high Cognitive Complexity and Standard outputs warning public static void main(String[] args) throws IOException { - String filename = "test.tsfile"; + String filename = "C:\\Users\\MARKLAU\\Desktop\\13-13-0-0.tsfile"; if (args.length >= 1) { filename = args[0]; } diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 64dd9260c7ecc..f9bbd6203d23d 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -26,7 +26,6 @@ import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; -import org.apache.iotdb.tsfile.file.metadata.MetadataIndexConstructor; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexEntry; import org.apache.iotdb.tsfile.file.metadata.MetadataIndexNode; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; @@ -57,7 +56,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -427,10 +425,12 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { } addCurrentIndexNodeToQueue(currentIndexNode, measurementMetadataIndexQueue, out); - deviceMetadataIndexMap.put( - prevDevice, - generateRootNode( - measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + if (prevDevice != null) { + deviceMetadataIndexMap.put( + prevDevice, + generateRootNode( + measurementMetadataIndexQueue, out, MetadataIndexNodeType.INTERNAL_MEASUREMENT)); + } MetadataIndexNode metadataIndex = checkAndBuildLevelIndex(deviceMetadataIndexMap, out); @@ -445,68 +445,6 @@ private void readChunkMetadataAndConstructIndexTree() throws IOException { ReadWriteIOUtils.write(size, out.wrapAsStream()); } - /** - * Flush TsFileMetadata, including ChunkMetadataList and TimeseriesMetaData - * - * @param chunkMetadataListMap chunkMetadata that Path.mask == 0 - * @return MetadataIndexEntry list in TsFileMetadata - */ - private MetadataIndexNode flushMetadataIndex(Map> chunkMetadataListMap) - throws IOException { - - // convert ChunkMetadataList to this field - deviceTimeseriesMetadataMap = new LinkedHashMap<>(); - // create device -> TimeseriesMetaDataList Map - for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { - // for ordinary path - TimeseriesMetadata timeseriesMetadata = - constructOneTimeseriesMetadata(entry.getKey(), entry.getValue()); - deviceTimeseriesMetadataMap - .computeIfAbsent(entry.getKey().getDevice(), k -> new ArrayList<>()) - .add(timeseriesMetadata); - } - - // construct TsFileMetadata and return - return MetadataIndexConstructor.constructMetadataIndex(deviceTimeseriesMetadataMap, out); - } - - /** - * Flush one chunkMetadata - * - * @param path Path of chunk - * @param chunkMetadataList List of chunkMetadata about path(previous param) - * @return the constructed TimeseriesMetadata - */ - protected TimeseriesMetadata constructOneTimeseriesMetadata( - Path path, List chunkMetadataList) throws IOException { - // create TimeseriesMetaData - PublicBAOS publicBAOS = new PublicBAOS(); - TSDataType dataType = chunkMetadataList.get(chunkMetadataList.size() - 1).getDataType(); - Statistics seriesStatistics = Statistics.getStatsByType(dataType); - - int chunkMetadataListLength = 0; - boolean serializeStatistic = (chunkMetadataList.size() > 1); - // flush chunkMetadataList one by one - for (IChunkMetadata chunkMetadata : chunkMetadataList) { - if (!chunkMetadata.getDataType().equals(dataType)) { - continue; - } - chunkMetadataListLength += chunkMetadata.serializeTo(publicBAOS, serializeStatistic); - seriesStatistics.mergeStatistics(chunkMetadata.getStatistics()); - } - - TimeseriesMetadata timeseriesMetadata = - new TimeseriesMetadata( - (byte) - ((serializeStatistic ? (byte) 1 : (byte) 0) | chunkMetadataList.get(0).getMask()), - chunkMetadataListLength, - path.getMeasurement(), - dataType, - seriesStatistics, - publicBAOS); - return timeseriesMetadata; - } - /** * get the length of normal OutputStream. * diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index 0fb62edd9b09d..85d5940c97f62 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -19,7 +19,6 @@ package org.apache.iotdb.tsfile.write.writer.tsmiterator; import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; -import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; @@ -35,6 +34,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -48,7 +48,8 @@ public class TSMIterator implements Iterator> { private static Logger LOG = LoggerFactory.getLogger(TSMIterator.class); protected Map> chunkMetadataListMap = new TreeMap<>(); - protected Iterator>> iterator; + protected List>> sortedChunkMetadataList = new ArrayList<>(); + protected Iterator>> iterator; protected TSMIterator(List chunkGroupMetadataList) { this.groupChunkMetadataListBySeries(chunkGroupMetadataList); @@ -72,12 +73,11 @@ public boolean hasNext() { @Override public Pair next() { - Map.Entry> nextEntry = iterator.next(); + Pair> nextPair = iterator.next(); try { return new Pair<>( - nextEntry.getKey().getFullPath(), - constructOneTimeseriesMetadata( - nextEntry.getKey().getMeasurement(), nextEntry.getValue())); + nextPair.left.getFullPath(), + constructOneTimeseriesMetadata(nextPair.left.getMeasurement(), nextPair.right)); } catch (IOException e) { LOG.error("Meets IOException when getting next TimeseriesMetadata", e); return null; @@ -85,15 +85,33 @@ public Pair next() { } protected void groupChunkMetadataListBySeries(List chunkGroupMetadataList) { - // group ChunkMetadata by series + Map> chunkMetadataListBySeries = new TreeMap<>(); for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { - List chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); - for (IChunkMetadata chunkMetadata : chunkMetadatas) { - Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); - chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); + for (IChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { + chunkMetadataListBySeries + .computeIfAbsent( + new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()), + x -> new ArrayList<>()) + .add(chunkMetadata); } } - this.iterator = chunkMetadataListMap.entrySet().iterator(); + // group ChunkMetadata by device + Map>> deviceChunkMetadataListMap = new LinkedHashMap<>(); + for (Map.Entry> entry : chunkMetadataListBySeries.entrySet()) { + deviceChunkMetadataListMap + .computeIfAbsent(entry.getKey().getDevice(), x -> new TreeMap<>()) + .put(entry.getKey(), entry.getValue()); + } + for (Map.Entry>> entry : + deviceChunkMetadataListMap.entrySet()) { + Map> pathChunkMetadataMapInOneDevice = entry.getValue(); + for (Map.Entry> pathAndChunkMetadataList : + pathChunkMetadataMapInOneDevice.entrySet()) { + sortedChunkMetadataList.add( + new Pair<>(pathAndChunkMetadataList.getKey(), pathAndChunkMetadataList.getValue())); + } + } + this.iterator = sortedChunkMetadataList.iterator(); } protected TimeseriesMetadata constructOneTimeseriesMetadata( diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index a57ddd2de0562..e3f12835b08a9 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -463,7 +463,7 @@ public void testWriteCompleteFileWithMetadataRemainsInMemoryWhenEndFile() throws writer.endChunkGroup(); } Assert.assertTrue(writer.hasChunkMetadataInDisk); - Assert.assertFalse(writer.chunkMetadataList.isEmpty()); + Assert.assertFalse(writer.chunkGroupMetadataList.isEmpty()); writer.endFile(); } TsFileIntegrityCheckingTool.checkIntegrityBySequenceRead(testFile.getPath()); From 8be3011341266b277e39e30e2ab00f3649f65085 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Mon, 19 Sep 2022 20:06:30 +0800 Subject: [PATCH 28/31] fix flush order bug --- .../tsfile/write/writer/TsFileIOWriter.java | 30 ++------- .../write/writer/tsmiterator/TSMIterator.java | 63 +++++++++---------- .../TsFileIOWriterMemoryControlTest.java | 2 - 3 files changed, 34 insertions(+), 61 deletions(-) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index f9bbd6203d23d..3593a028d1883 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -290,27 +290,6 @@ public void endCurrentChunk() { currentChunkMetadata = null; } - protected Map> groupChunkMetadataListBySeries() { - // group ChunkMetadata by series - Map> chunkMetadataListMap = new TreeMap<>(); - - for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { - List chunkMetadatas = chunkGroupMetadata.getChunkMetadataList(); - for (IChunkMetadata chunkMetadata : chunkMetadatas) { - Path series = new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()); - chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); - } - } - - if (chunkMetadataList != null && chunkMetadataList.size() > 0) { - for (ChunkMetadata chunkMetadata : chunkMetadataList) { - Path series = new Path(currentChunkGroupDeviceId, chunkMetadata.getMeasurementUid()); - chunkMetadataListMap.computeIfAbsent(series, k -> new ArrayList<>()).add(chunkMetadata); - } - } - return chunkMetadataListMap; - } - /** * write {@linkplain TsFileMetadata TSFileMetaData} to output stream and close it. * @@ -615,20 +594,21 @@ public void checkMetadataSizeAndMayFlush() throws IOException { */ protected void sortAndFlushChunkMetadata() throws IOException { // group by series - Map> chunkMetadataListMap = groupChunkMetadataListBySeries(); + List>> sortedChunkMetadatList = + TSMIterator.sortChunkMetadata(chunkGroupMetadataList); if (tempOutput == null) { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } hasChunkMetadataInDisk = true; // the file structure in temp file will be // chunkSize | chunkBuffer - for (Map.Entry> entry : chunkMetadataListMap.entrySet()) { - Path seriesPath = entry.getKey(); + for (Pair> pair : sortedChunkMetadatList) { + Path seriesPath = pair.left; if (!seriesPath.equals(lastSerializePath)) { // record the count of path to construct bloom filter later pathCount++; } - List iChunkMetadataList = entry.getValue(); + List iChunkMetadataList = pair.right; writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath, tempOutput); lastSerializePath = seriesPath; logger.debug("Flushing {}", seriesPath); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index 85d5940c97f62..bf66714090404 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -34,7 +34,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -47,12 +46,12 @@ */ public class TSMIterator implements Iterator> { private static Logger LOG = LoggerFactory.getLogger(TSMIterator.class); - protected Map> chunkMetadataListMap = new TreeMap<>(); - protected List>> sortedChunkMetadataList = new ArrayList<>(); + protected List>> sortedChunkMetadataList; protected Iterator>> iterator; protected TSMIterator(List chunkGroupMetadataList) { - this.groupChunkMetadataListBySeries(chunkGroupMetadataList); + this.sortedChunkMetadataList = sortChunkMetadata(chunkGroupMetadataList); + this.iterator = sortedChunkMetadataList.iterator(); } public static TSMIterator getTSMIteratorInMemory( @@ -84,36 +83,6 @@ public Pair next() { } } - protected void groupChunkMetadataListBySeries(List chunkGroupMetadataList) { - Map> chunkMetadataListBySeries = new TreeMap<>(); - for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { - for (IChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { - chunkMetadataListBySeries - .computeIfAbsent( - new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()), - x -> new ArrayList<>()) - .add(chunkMetadata); - } - } - // group ChunkMetadata by device - Map>> deviceChunkMetadataListMap = new LinkedHashMap<>(); - for (Map.Entry> entry : chunkMetadataListBySeries.entrySet()) { - deviceChunkMetadataListMap - .computeIfAbsent(entry.getKey().getDevice(), x -> new TreeMap<>()) - .put(entry.getKey(), entry.getValue()); - } - for (Map.Entry>> entry : - deviceChunkMetadataListMap.entrySet()) { - Map> pathChunkMetadataMapInOneDevice = entry.getValue(); - for (Map.Entry> pathAndChunkMetadataList : - pathChunkMetadataMapInOneDevice.entrySet()) { - sortedChunkMetadataList.add( - new Pair<>(pathAndChunkMetadataList.getKey(), pathAndChunkMetadataList.getValue())); - } - } - this.iterator = sortedChunkMetadataList.iterator(); - } - protected TimeseriesMetadata constructOneTimeseriesMetadata( String measurementId, List chunkMetadataList) throws IOException { // create TimeseriesMetaData @@ -143,4 +112,30 @@ protected TimeseriesMetadata constructOneTimeseriesMetadata( publicBAOS); return timeseriesMetadata; } + + public static List>> sortChunkMetadata( + List chunkGroupMetadataList) { + Map>> chunkMetadataMap = new TreeMap<>(); + List>> sortedChunkMetadataList = new LinkedList<>(); + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + chunkMetadataMap.computeIfAbsent(chunkGroupMetadata.getDevice(), x -> new TreeMap<>()); + for (IChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { + chunkMetadataMap + .get(chunkGroupMetadata.getDevice()) + .computeIfAbsent( + new Path(chunkGroupMetadata.getDevice(), chunkMetadata.getMeasurementUid()), + x -> new ArrayList<>()) + .add(chunkMetadata); + } + } + for (Map.Entry>> entry : chunkMetadataMap.entrySet()) { + Map> seriesChunkMetadataMap = entry.getValue(); + for (Map.Entry> seriesChunkMetadataEntry : + seriesChunkMetadataMap.entrySet()) { + sortedChunkMetadataList.add( + new Pair<>(seriesChunkMetadataEntry.getKey(), seriesChunkMetadataEntry.getValue())); + } + } + return sortedChunkMetadataList; + } } diff --git a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java index e3f12835b08a9..44e4af3678819 100644 --- a/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java +++ b/tsfile/src/test/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriterMemoryControlTest.java @@ -28,7 +28,6 @@ import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding; -import org.apache.iotdb.tsfile.read.common.Path; import org.apache.iotdb.tsfile.utils.Binary; import org.apache.iotdb.tsfile.utils.Pair; import org.apache.iotdb.tsfile.utils.TsPrimitiveType; @@ -156,7 +155,6 @@ public void testSerializeAndDeserializeAlignedChunkMetadata() throws IOException originChunkMetadataList.addAll(writer.chunkMetadataList); writer.endChunkGroup(); } - Map> originChunkMetadata = writer.groupChunkMetadataListBySeries(); writer.sortAndFlushChunkMetadata(); writer.tempOutput.flush(); From 23ed0b918c4750b71acac1f2a0f9f61c4d33192b Mon Sep 17 00:00:00 2001 From: LiuXuxin Date: Mon, 19 Sep 2022 22:34:28 +0800 Subject: [PATCH 29/31] fix ci --- .../db/tools/TsFileSplitByPartitionTool.java | 4 +-- .../tsfile/write/writer/TsFileIOWriter.java | 36 +++++++++++++++---- .../write/writer/tsmiterator/TSMIterator.java | 19 ++++++++-- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/tools/TsFileSplitByPartitionTool.java b/server/src/main/java/org/apache/iotdb/db/tools/TsFileSplitByPartitionTool.java index b573ed354d271..ddfa8789b7d5d 100644 --- a/server/src/main/java/org/apache/iotdb/db/tools/TsFileSplitByPartitionTool.java +++ b/server/src/main/java/org/apache/iotdb/db/tools/TsFileSplitByPartitionTool.java @@ -492,10 +492,10 @@ protected boolean fileCheck() throws IOException { protected TsFileResource endFileAndGenerateResource(TsFileIOWriter tsFileIOWriter) throws IOException { - tsFileIOWriter.endFile(); - TsFileResource tsFileResource = new TsFileResource(tsFileIOWriter.getFile()); Map> deviceTimeseriesMetadataMap = tsFileIOWriter.getDeviceTimeseriesMetadataMap(); + tsFileIOWriter.endFile(); + TsFileResource tsFileResource = new TsFileResource(tsFileIOWriter.getFile()); for (Entry> entry : deviceTimeseriesMetadataMap.entrySet()) { String device = entry.getKey(); for (TimeseriesMetadata timeseriesMetaData : entry.getValue()) { diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 3593a028d1883..3aa65a8c934ac 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -546,6 +546,30 @@ public TsFileOutput getIOWriterOut() { * @return DeviceTimeseriesMetadataMap */ public Map> getDeviceTimeseriesMetadataMap() { + Map> deviceTimeseriesMetadataMap = new TreeMap<>(); + Map>> chunkMetadataMap = new TreeMap<>(); + for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { + for (ChunkMetadata chunkMetadata : chunkGroupMetadata.getChunkMetadataList()) { + chunkMetadataMap + .computeIfAbsent(chunkGroupMetadata.getDevice(), x -> new TreeMap<>()) + .computeIfAbsent(chunkMetadata.getMeasurementUid(), x -> new ArrayList<>()) + .add(chunkMetadata); + } + } + for (String device : chunkMetadataMap.keySet()) { + Map> seriesToChunkMetadataMap = chunkMetadataMap.get(device); + for (Map.Entry> entry : seriesToChunkMetadataMap.entrySet()) { + try { + deviceTimeseriesMetadataMap + .computeIfAbsent(device, x -> new ArrayList<>()) + .add(TSMIterator.constructOneTimeseriesMetadata(entry.getKey(), entry.getValue())); + } catch (IOException e) { + logger.error("Failed to get device timeseries metadata map", e); + return null; + } + } + } + return deviceTimeseriesMetadataMap; } @@ -594,22 +618,23 @@ public void checkMetadataSizeAndMayFlush() throws IOException { */ protected void sortAndFlushChunkMetadata() throws IOException { // group by series - List>> sortedChunkMetadatList = - TSMIterator.sortChunkMetadata(chunkGroupMetadataList); + List>> sortedChunkMetadataList = + TSMIterator.sortChunkMetadata( + chunkGroupMetadataList, currentChunkGroupDeviceId, chunkMetadataList); if (tempOutput == null) { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } hasChunkMetadataInDisk = true; // the file structure in temp file will be // chunkSize | chunkBuffer - for (Pair> pair : sortedChunkMetadatList) { + for (Pair> pair : sortedChunkMetadataList) { Path seriesPath = pair.left; if (!seriesPath.equals(lastSerializePath)) { // record the count of path to construct bloom filter later pathCount++; } List iChunkMetadataList = pair.right; - writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath, tempOutput); + writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath); lastSerializePath = seriesPath; logger.debug("Flushing {}", seriesPath); } @@ -621,8 +646,7 @@ protected void sortAndFlushChunkMetadata() throws IOException { } private void writeChunkMetadataToTempFile( - List iChunkMetadataList, Path seriesPath, LocalTsFileOutput output) - throws IOException { + List iChunkMetadataList, Path seriesPath) throws IOException { // [DeviceId] measurementId datatype size chunkMetadataBuffer if (lastSerializePath == null || !seriesPath.getDevice().equals(lastSerializePath.getDevice())) { diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index bf66714090404..fae1b63b8c448 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -19,6 +19,7 @@ package org.apache.iotdb.tsfile.write.writer.tsmiterator; import org.apache.iotdb.tsfile.file.metadata.ChunkGroupMetadata; +import org.apache.iotdb.tsfile.file.metadata.ChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.IChunkMetadata; import org.apache.iotdb.tsfile.file.metadata.TimeseriesMetadata; import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType; @@ -50,7 +51,7 @@ public class TSMIterator implements Iterator> { protected Iterator>> iterator; protected TSMIterator(List chunkGroupMetadataList) { - this.sortedChunkMetadataList = sortChunkMetadata(chunkGroupMetadataList); + this.sortedChunkMetadataList = sortChunkMetadata(chunkGroupMetadataList, null, null); this.iterator = sortedChunkMetadataList.iterator(); } @@ -83,7 +84,7 @@ public Pair next() { } } - protected TimeseriesMetadata constructOneTimeseriesMetadata( + public static TimeseriesMetadata constructOneTimeseriesMetadata( String measurementId, List chunkMetadataList) throws IOException { // create TimeseriesMetaData PublicBAOS publicBAOS = new PublicBAOS(); @@ -114,7 +115,9 @@ protected TimeseriesMetadata constructOneTimeseriesMetadata( } public static List>> sortChunkMetadata( - List chunkGroupMetadataList) { + List chunkGroupMetadataList, + String currentDevice, + List chunkMetadataList) { Map>> chunkMetadataMap = new TreeMap<>(); List>> sortedChunkMetadataList = new LinkedList<>(); for (ChunkGroupMetadata chunkGroupMetadata : chunkGroupMetadataList) { @@ -128,6 +131,16 @@ public static List>> sortChunkMetadata( .add(chunkMetadata); } } + if (currentDevice != null) { + for (IChunkMetadata chunkMetadata : chunkMetadataList) { + chunkMetadataMap + .computeIfAbsent(currentDevice, x -> new TreeMap<>()) + .computeIfAbsent( + new Path(currentDevice, chunkMetadata.getMeasurementUid()), x -> new ArrayList<>()) + .add(chunkMetadata); + } + } + for (Map.Entry>> entry : chunkMetadataMap.entrySet()) { Map> seriesChunkMetadataMap = entry.getValue(); for (Map.Entry> seriesChunkMetadataEntry : From f0048582a719a6eb2e7a1d55e89caca2dc544f14 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 20 Sep 2022 10:33:57 +0800 Subject: [PATCH 30/31] fix ci --- .../iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java index d885fe036ee8c..391426cc34fe1 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/RestorableTsFileIOWriter.java @@ -86,6 +86,7 @@ public RestorableTsFileIOWriter(File file, long maxMetadataSize) throws IOExcept this(file, true); this.maxMetadataSize = maxMetadataSize; this.enableMemoryControl = true; + this.chunkMetadataTempFile = new File(file.getAbsolutePath() + CHUNK_METADATA_TEMP_FILE_SUFFIX); this.checkMetadataSizeAndMayFlush(); } From 5cccdab7b05d6e6f7fb295e199a805ec0d03fe01 Mon Sep 17 00:00:00 2001 From: Liu Xuxin Date: Tue, 20 Sep 2022 16:48:39 +0800 Subject: [PATCH 31/31] adjust according to review --- .../iotdb/tsfile/TsFileSequenceRead.java | 2 +- .../engine/storagegroup/TsFileResource.java | 5 ++++- .../tsfile/write/writer/TsFileIOWriter.java | 14 +++++++------- .../writer/tsmiterator/DiskTSMIterator.java | 16 ++++++++-------- .../write/writer/tsmiterator/TSMIterator.java | 19 ++++++------------- 5 files changed, 26 insertions(+), 30 deletions(-) diff --git a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java index aafb358212c70..aa946f67b7d5e 100644 --- a/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java +++ b/example/tsfile/src/main/java/org/apache/iotdb/tsfile/TsFileSequenceRead.java @@ -53,7 +53,7 @@ public class TsFileSequenceRead { "squid:S106" }) // Suppress high Cognitive Complexity and Standard outputs warning public static void main(String[] args) throws IOException { - String filename = "C:\\Users\\MARKLAU\\Desktop\\13-13-0-0.tsfile"; + String filename = "test.tsfile"; if (args.length >= 1) { filename = args[0]; } diff --git a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java index 6addc3934b6cb..95d35d115d958 100644 --- a/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java +++ b/server/src/main/java/org/apache/iotdb/db/engine/storagegroup/TsFileResource.java @@ -506,7 +506,10 @@ public void removeModFile() throws IOException { modFile = null; } - /** Remove the data file, its resource file, and its modification file physically. */ + /** + * Remove the data file, its resource file, its chunk metadata temp file, and its modification + * file physically. + */ public boolean remove() { try { fsFactory.deleteIfExists(file); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java index 3aa65a8c934ac..851f03c192536 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/TsFileIOWriter.java @@ -114,7 +114,7 @@ public class TsFileIOWriter implements AutoCloseable { // record the total num of path in order to make bloom filter protected int pathCount = 0; protected boolean enableMemoryControl = false; - Path lastSerializePath = null; + private Path lastSerializePath = null; protected LinkedList endPosInCMTForDevice = new LinkedList<>(); public static final String CHUNK_METADATA_TEMP_FILE_SUFFIX = ".cmt"; @@ -625,16 +625,15 @@ protected void sortAndFlushChunkMetadata() throws IOException { tempOutput = new LocalTsFileOutput(new FileOutputStream(chunkMetadataTempFile)); } hasChunkMetadataInDisk = true; - // the file structure in temp file will be - // chunkSize | chunkBuffer for (Pair> pair : sortedChunkMetadataList) { Path seriesPath = pair.left; - if (!seriesPath.equals(lastSerializePath)) { + boolean isNewPath = !seriesPath.equals(lastSerializePath); + if (isNewPath) { // record the count of path to construct bloom filter later pathCount++; } List iChunkMetadataList = pair.right; - writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath); + writeChunkMetadataToTempFile(iChunkMetadataList, seriesPath, isNewPath); lastSerializePath = seriesPath; logger.debug("Flushing {}", seriesPath); } @@ -646,7 +645,8 @@ protected void sortAndFlushChunkMetadata() throws IOException { } private void writeChunkMetadataToTempFile( - List iChunkMetadataList, Path seriesPath) throws IOException { + List iChunkMetadataList, Path seriesPath, boolean isNewPath) + throws IOException { // [DeviceId] measurementId datatype size chunkMetadataBuffer if (lastSerializePath == null || !seriesPath.getDevice().equals(lastSerializePath.getDevice())) { @@ -656,7 +656,7 @@ private void writeChunkMetadataToTempFile( // for each device, we only serialize it once, in order to save io ReadWriteIOUtils.write(seriesPath.getDevice(), tempOutput.wrapAsStream()); } - if (!seriesPath.equals(lastSerializePath) && iChunkMetadataList.size() > 0) { + if (isNewPath && iChunkMetadataList.size() > 0) { // serialize the public info of this measurement ReadWriteIOUtils.writeVar(seriesPath.getMeasurement(), tempOutput.wrapAsStream()); ReadWriteIOUtils.write(iChunkMetadataList.get(0).getDataType(), tempOutput.wrapAsStream()); diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java index ad623c0f65e0c..fd02f1438a68a 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/DiskTSMIterator.java @@ -47,14 +47,14 @@ public class DiskTSMIterator extends TSMIterator { private static final Logger LOG = LoggerFactory.getLogger(DiskTSMIterator.class); - LinkedList endPosForEachDevice; - File cmtFile; - LocalTsFileInput input; - long fileLength = 0; - long currentPos = 0; - long nextEndPosForDevice = 0; - String currentDevice; - boolean remainsInFile = true; + private LinkedList endPosForEachDevice; + private File cmtFile; + private LocalTsFileInput input; + private long fileLength = 0; + private long currentPos = 0; + private long nextEndPosForDevice = 0; + private String currentDevice; + private boolean remainsInFile = true; protected DiskTSMIterator( File cmtFile, diff --git a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java index fae1b63b8c448..f11242f296240 100644 --- a/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java +++ b/tsfile/src/main/java/org/apache/iotdb/tsfile/write/writer/tsmiterator/TSMIterator.java @@ -45,8 +45,8 @@ * source from memory or disk. Static method getTSMIteratorInMemory returns a TSMIterator that reads * from memory, and static method getTSMIteratorInDisk returns a TSMIterator that reads from disk. */ -public class TSMIterator implements Iterator> { - private static Logger LOG = LoggerFactory.getLogger(TSMIterator.class); +public class TSMIterator { + private static final Logger LOG = LoggerFactory.getLogger(TSMIterator.class); protected List>> sortedChunkMetadataList; protected Iterator>> iterator; @@ -66,22 +66,15 @@ public static TSMIterator getTSMIteratorInDisk( return new DiskTSMIterator(cmtFile, chunkGroupMetadataList, serializePos); } - @Override public boolean hasNext() { return iterator.hasNext(); } - @Override - public Pair next() { + public Pair next() throws IOException { Pair> nextPair = iterator.next(); - try { - return new Pair<>( - nextPair.left.getFullPath(), - constructOneTimeseriesMetadata(nextPair.left.getMeasurement(), nextPair.right)); - } catch (IOException e) { - LOG.error("Meets IOException when getting next TimeseriesMetadata", e); - return null; - } + return new Pair<>( + nextPair.left.getFullPath(), + constructOneTimeseriesMetadata(nextPair.left.getMeasurement(), nextPair.right)); } public static TimeseriesMetadata constructOneTimeseriesMetadata(