dataTear

Switch to：English document
知识库
dataTear

拆分成数据碎片去进行数据的管理，在这种格式下，可以实现高效读取，避免不必要的数据读取操作。
MAVEN dependent

Maven存储库 url: https://s01.oss.sonatype.org/content/repositories/snapshots

      <repositories>
          <repository>
              <id>a</id>
              <name>sonatype</name>
              <url>https://s01.oss.sonatype.org/content/repositories/snapshots</url>
          </repository>
      </repositories>

      <dependencies>
          <dependency>
              <groupId>io.github.BeardedManZhao</groupId>
              <artifactId>dataTear</artifactId>
              <version>1.4-SNAPSHOT</version>
          </dependency>
      </dependencies>

使用示例

下面是针对dataTear文件的读写进行的一个示例，master就是数据输出组件，Reader就是数据读取组件，这俩组件的超接口是同一个，灵活性比较强大！针对参数的设置，可以采取链式，当然您也可以分步进行设置，提供了强大的灵活性。具体使用方式请参阅下面的main函数代码文档!
完整API的调用示例

此处的API调用相对完整，使用到的功能时比较全面的，您可以按照下面的API调用去进行集成开发。

public class DataTearTest {
    public static void main(String[] args) throws IOException {
        BasicConfigurator.configure();
        Date date = new Date();
        // 通过RW 将Master实例化
        DTMaster dtMaster = new DTMaster(s -> RW.getDT_UDF_Stream(DT_builtIn_UDF.LOCAL_GZIP).writeStream(s)) // 实例化时，可以将外界的数据组件对接到本类中，也可以直接调用RW接口从算法库中提取数据组件
                .ReadFormat(DataSourceFormat.built_in).WriterFormat(DataOutputFormat.UDT) // 设置数据输入与输出模式
                .setUseSynchronization(true) // 是否使用同步写数据，等待数据输出完成再结束
                .setIn_FilePath("D:\\互联网信息.txt") // 设置被读取的文件路径
                .setOUT_FilePath("C:\\Users\\4\\Desktop\\out") // 设置DataTear数据输出到哪个目录
                .setSplitrex(",") // 设置数据输入的列分隔符
                .setOutSplit(",") // 设置数据输出的列分隔符
                .setPrimaryNum(0) // 设置数据表中的主键索引，该索引列的数据将会被作为nameManager的一部分
                .setFragmentationNum(2); // 设置输出多少个数据碎片
        // 运行组件
        runRW(dtMaster);

        System.err.println("ok !  写数据耗时：" + (new Date().getTime() - date.getTime()) + "毫秒");

        /*TODO 数据组件分割 */

        Date date2 = new Date();
        // 通过RW将Reader实例化
        Reader dtRead = new DTRead(s -> RW.getDT_UDF_Stream(DT_builtIn_UDF.LOCAL_GZIP).readStream(s)) // 实例化时，可以将外界的数据组件对接到本类中，也可以直接调用RW接口从算法库中提取数据组件
                .setPrimaryCharacteristic(data -> true) // 设置数据主键描述，满足该条件的主键所在数据碎片将会被读取
                .setUseMultithreading(true) // 设置是否使用同步读取
                .setMaxOutTimeMS(10000) // 设置数据读取最大超时时间（毫秒），超出时间将会立刻停止数据的读取
                .setIn_FilePath("C:\\Users\\4\\Desktop\\out\\NameManager.NDT"); // 设置被读取的NameManager路径
        // 运行组件
        runRW(dtRead);

        System.err.println("ok !  读数据耗时：" + (new Date().getTime() - date2.getTime()) + "毫秒");
        System.err.println("源文件：" + dtRead.getSrcFile() + "\t创建时间：" + new Date(dtRead.getCreateDateMS()).toLocaleString());
        System.err.println("数据行数：" + dtRead.getDataString().split("\n").length);
    }

    /**
     * 运行一个rw组件
     */
    public static boolean runRW(RW rw) throws IOException {
        return rw.openStream() && rw.op_Data() && rw.closeStream();
    }
}

最简单的API示例

public class DataTear {
    public static void main(String[] args) throws IOException {
        BasicConfigurator.configure();
        // 配置数据输出类
        DTMaster dtMaster = new DTMaster(null)
                .WriterFormat(DataOutputFormat.built_in) // be careful!!! If UDF is not set here, data will be automatically written in "LOCAL_TEXT" mode
                .setPrimaryNum(0)
                .setIn_FilePath("C:\\Users\\4\\Desktop\\mathematicalModeling\\Attached documents\\test.txt") // Set the path of the converted file
                .setOUT_FilePath("C:\\Users\\4\\Desktop\\mathematicalModeling\\out") // Set the storage path of NM and other files after conversion
                .setSplitrex("\\s+");
        dtMaster.openStream();
        dtMaster.op_Data();
        dtMaster.closeStream();

        // 配置数据读取类
        Reader reader = new DTRead(InPath -> RW.getDT_UDF_Stream(DT_builtIn_UDF.LOCAL_TEXT).readStream(InPath))
                .setPrimaryCharacteristic((data) -> true)
                .setIn_FilePath("C:\\Users\\4\\Desktop\\mathematicalModeling\\out\\NameManager.NDT"); // Set the NM path of the read file
        reader.openStream();
        reader.op_Data();
        reader.closeStream();

        System.out.println(reader.getDataString());
    }
}