diff --git a/Cargo.lock b/Cargo.lock index 5310170..9ab9e1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1582,7 +1582,7 @@ dependencies = [ [[package]] name = "tataki" -version = "0.2.2" +version = "0.3.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index e3a4ddd..e8b73bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tataki" authors = ["Tazro Ohta (tazro.ohta@chiba-u.jp)"] -version = "0.2.2" +version = "0.3.0" edition = "2021" repository = "https://github.com/sapporo-wes/tataki" license = "apache-2.0" diff --git a/Dockerfile b/Dockerfile index d3826af..0ed70c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM debian:bookworm-slim LABEL org.opencontainers.image.authors="Tazro Ohta (tazro.ohta@chiba-u.jp)" LABEL org.opencontainers.image.url="https://github.com/sapporo-wes/tataki" -LABEL org.opencontainers.image.version="v0.2.2" +LABEL org.opencontainers.image.version="v0.3.0" LABEL org.opencontainers.image.licenses="Apache2.0" LABEL org.opencontainers.image.description="CLI tool designed primarily for detecting file formats in the bio-science field" diff --git a/README.md b/README.md index ceb4d33..ed0a6e0 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ docker run --rm -v /var/run/docker.sock:/var/run/docker.sock -v /tmp:/tmp -v $PW ## Quick Start -Determining the file format of a local file: +Determine the file format of a local file: ```shell $ tataki path/to/unknown/file.txt -q @@ -50,11 +50,22 @@ File Path,Edam ID,Label path/to/unknown/file.txt,http://edamontology.org/format_2572,BAM ``` -Output result in YAML format: +Determine the file format of remote file, and output result in YAML format: ```shell -$ tataki path/to/unknown/file.txt -q -f yaml -path/to/unknown/file.txt: +$ tataki https://path/to/unknown/file.txt -q -f yaml +https://path/to/unknown/file.txt: + label: BAM + id: http://edamontology.org/format_2572 +``` + +Read the whole records from the input file: + +This may take while depending on the file size. + +```shell +$ tataki https://path/to/unknown/file.txt -q --tidy +https://path/to/unknown/file.txt: label: BAM id: http://edamontology.org/format_2572 ``` @@ -77,21 +88,39 @@ Arguments: [FILE|URL]... Path to the file Options: - -o, --output Path to the output file [default: stdout] - -f [default: csv] [possible values: yaml, tsv, csv, json] - --cache-dir Specify the directory in which to create a temporary directory. If this option is not provided, a temporary directory will be created in the default system temporary directory (/tmp) - -c, --conf Specify the tataki configuration file. If this option is not provided, the default configuration will be used. The option `--dry-run` shows the default configuration file - --dry-run Output the configuration file in yaml format and exit the program. If `--conf` option is not provided, the default configuration file will be shown - -v, --verbose Show verbose log messages - -q, --quiet Suppress all log messages - -h, --help Print help - -V, --version Print version - -Version: v0.2.2 + -o, --output Path to the output file [default: stdout] + -f [default: csv] [possible values: yaml, tsv, csv, json] + -C, --cache-dir Specify the directory in which to create a temporary directory. If this option is not provided, a temporary directory will be created in the default system temporary directory (/tmp) + -c, --conf Specify the tataki configuration file. If this option is not provided, the default configuration will be used. The option `--dry-run` shows the default configuration file + -t, --tidy Attempt to read the whole lines from the input files + -n, --num-records Number of records to read from the input file. Conflicts with `--tidy` option [default: 100000] + --dry-run Output the configuration file in yaml format and exit the program. If `--conf` option is not provided, the default configuration file will be shown + -v, --verbose Show verbose log messages + -q, --quiet Suppress all log messages + -h, --help Print help + -V, --version Print version + +Version: v0.3.0 ``` ## Detailed Usage +### Changing the number of records to read + +By default, Tataki reads the first 100,000 records of the input file. You can change this number by using the `-n|--num-records=` option. + +```shell +tataki -n 1000 +``` + +#### Avoiding misidentifyll + +By using the `-t|--tidy` option, Tataki attempts to read the whole lines from the input files. This options helps when the file is truncated or its end is corrupted. + +```shell +tataki -t +``` + ### Determining Formats in Your Preferred Order Using the `-c|--conf=` option allows you to change the order or set the file formats to use for determination.