From de3757d922f62ee6afb1abd95c8f4953df0ff6e5 Mon Sep 17 00:00:00 2001
From: Titusz Pan
{
"<function_name>": {
diff --git a/options/options/index.html b/options/options/index.html
index fe1a2e1..7864d43 100755
--- a/options/options/index.html
+++ b/options/options/index.html
@@ -1400,8 +1400,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1430,8 +1430,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1459,8 +1459,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1489,8 +1489,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1519,8 +1519,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1549,8 +1549,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1579,8 +1579,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1609,8 +1609,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1639,8 +1639,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1679,8 +1679,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1709,8 +1709,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1739,8 +1739,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1769,8 +1769,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1799,8 +1799,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1829,8 +1829,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1859,8 +1859,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1889,8 +1889,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
@@ -1919,8 +1919,8 @@
- class-attribute
instance-attribute
+ class-attribute
#
diff --git a/search/search_index.json b/search/search_index.json
index 5c2642e..682b25c 100755
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"ISCC - Codec & Algorithms","text":"iscc-core
is the reference implementation of the core algorithms of the ISCC (International Standard Content Code)
"},{"location":"#what-is-the-iscc","title":"What is the ISCC","text":"The ISCC is a similarity preserving fingerprint and identifier for digital media assets.
ISCCs are generated algorithmically from digital content, just like cryptographic hashes. However, instead of using a single cryptographic hash function to identify data only, the ISCC uses various algorithms to create a composite identifier that exhibits similarity-preserving properties (soft hash).
The component-based structure of the ISCC identifies content at multiple levels of abstraction. Each component is self-describing, modular, and can be used separately or with others to aid in various content identification tasks. The algorithmic design supports content deduplication, database synchronization, indexing, integrity verification, timestamping, versioning, data provenance, similarity clustering, anomaly detection, usage tracking, allocation of royalties, fact-checking and general digital asset management use-cases.
"},{"location":"#what-is-iscc-core","title":"What is iscc-core
","text":"iscc-core
is a python based reference library of the core algorithms to create standard-compliant ISCC codes. It also a good reference for porting ISCC to other programming languages.
Tip
This is a low level reference implementation that does not inlcude features like mediatype detection, metadata extraction or file format specific content extraction. Please have a look at the iscc-sdk which adds those higher level features on top of the iscc-core
library.
"},{"location":"#project-status","title":"Project Status","text":"The ISCC is under development as ISO/CD 24138 - International Standard Content Code within ISO/TC 46/SC 9/WG 18.
"},{"location":"#iscc-architecture","title":"ISCC Architecture","text":""},{"location":"#iscc-maintypes","title":"ISCC MainTypes","text":"Idx Slug Bits Purpose 0 META 0000 Match on metadata similarity 1 SEMANTIC 0001 Match on semantic content similarity 2 CONTENT 0010 Match on perceptual content similarity 3 DATA 0011 Match on data similarity 4 INSTANCE 0100 Match on data identity 5 ISCC 0101 Composite of two or more components with common header"},{"location":"#installation","title":"Installation","text":"Use the package manager pip to install iscc-core
.
pip install iscc-core\n
"},{"location":"#quick-start","title":"Quick Start","text":"import json\nimport iscc_core as ic\nmeta_code = ic.gen_meta_code(name=\"ISCC Test Document!\")\nprint(f\"Meta-Code: {meta_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(meta_code['iscc'])}\\n\")\n# Extract text from file\nwith open(\"demo.txt\", \"rt\", encoding=\"utf-8\") as stream:\ntext = stream.read()\ntext_code = ic.gen_text_code_v0(text)\nprint(f\"Text-Code: {text_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(text_code['iscc'])}\\n\")\n# Process raw bytes of textfile\nwith open(\"demo.txt\", \"rb\") as stream:\ndata_code = ic.gen_data_code(stream)\nprint(f\"Data-Code: {data_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(data_code['iscc'])}\\n\")\nstream.seek(0)\ninstance_code = ic.gen_instance_code(stream)\nprint(f\"Instance-Code: {instance_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(instance_code['iscc'])}\\n\")\n# Combine ISCC-UNITs into ISCC-CODE\niscc_code = ic.gen_iscc_code(\n(meta_code[\"iscc\"], text_code[\"iscc\"], data_code[\"iscc\"], instance_code[\"iscc\"])\n)\n# Create convenience `Code` object from ISCC string\niscc_obj = ic.Code(iscc_code[\"iscc\"])\nprint(f\"ISCC-CODE: {ic.iscc_normalize(iscc_obj.code)}\")\nprint(f\"Structure: {iscc_obj.explain}\")\nprint(f\"Multiformat: {iscc_obj.mf_base32}\\n\")\n# Compare with changed ISCC-CODE:\nnew_dc, new_ic = ic.Code.rnd(mt=ic.MT.DATA), ic.Code.rnd(mt=ic.MT.INSTANCE)\nnew_iscc = ic.gen_iscc_code((meta_code[\"iscc\"], text_code[\"iscc\"], new_dc.uri, new_ic.uri))\nprint(f\"Compare ISCC-CODES:\\n{iscc_obj.uri}\\n{new_iscc['iscc']}\")\nprint(json.dumps(ic.iscc_compare(iscc_obj.code, new_iscc[\"iscc\"]), indent=2))\n
The output of this example is as follows:
Meta-Code: ISCC:AAAT4EBWK27737D2\nStructure: META-NONE-V0-64-3e103656bffdfc7a\n\nText-Code: ISCC:EAAQMBEYQF6457DP\nStructure: CONTENT-TEXT-V0-64-060498817dcefc6f\n\nData-Code: ISCC:GAA7UJMLDXHPPENG\nStructure: DATA-NONE-V0-64-fa258b1dcef791a6\n\nInstance-Code: ISCC:IAA3Y7HR2FEZCU4N\nStructure: INSTANCE-NONE-V0-64-bc7cf1d14991538d\n\nISCC-CODE: ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI\nStructure: ISCC-TEXT-V0-MCDI-3e103656bffdfc7a060498817dcefc6ffa258b1dcef791a6bc7cf1d14991538d\nMultiformat: bzqavabj6ca3fnp757r5ambeyqf6457dp7isywhoo66i2npd46hiutektru\n\nCompare ISCC-CODES:\nISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI\nISCC:KACT4EBWK27737D2AYCJRAL5Z36G7Y7HA2BMECKMVRBEQXR2BJOS6NA\n{\n \"meta_dist\": 0,\n \"content_dist\": 0,\n \"data_dist\": 33,\n \"instance_match\": false\n}\n
"},{"location":"#documentation","title":"Documentation","text":"Documentation is published athttps://core.iscc.codes
"},{"location":"#development","title":"Development","text":"Requirements
- Python 3.7.2 or higher for code generation and static site building.
- Poetry for installation and dependency management.
Development Setup
git clone https://github.com/iscc/iscc-core.git\ncd iscc-core\npoetry install\n
Development Tasks
Tests, coverage, code formatting and other tasks can be run with the poe
command:
poe\n\nPoe the Poet - A task runner that works well with poetry.\nversion 0.18.1\n\nResult: No task specified.\n\nUSAGE\n poe [-h] [-v | -q] [--root PATH] [--ansi | --no-ansi] task [task arguments]\nGLOBAL OPTIONS\n -h, --help Show this help page and exit\n--version Print the version and exit\n-v, --verbose Increase command output (repeatable)\n-q, --quiet Decrease command output (repeatable)\n-d, --dry-run Print the task contents but don't actually run it\n --root PATH Specify where to find the pyproject.toml\n --ansi Force enable ANSI output\n --no-ansi Force disable ANSI output\nCONFIGURED TASKS\n gentests Generate conformance test data\n format Code style formating with black\n docs Copy README.md to /docs\n format-md Markdown formating with mdformat\n lf Convert line endings to lf\n test Run tests with coverage\n sec Security check with bandit\n all\n
Use poe all
to run all tasks before committing any changes.
"},{"location":"#maintainers","title":"Maintainers","text":"@titusz
"},{"location":"#contributing","title":"Contributing","text":"Pull requests are welcome. For significant changes, please open an issue first to discuss your plans. Please make sure to update tests as appropriate.
You may also want join our developer chat on Telegram at https://t.me/iscc_dev.
"},{"location":"changelog/","title":"Changelog","text":""},{"location":"changelog/#105-2023-12-07","title":"[1.0.5] - 2023-12-07","text":" - Improved simhash performance
- Added native dct and wtahash support
- Added Python 3.12 support
- Updated and relax dependencies
"},{"location":"changelog/#104-2023-06-05","title":"[1.0.4] - 2023-06-05","text":" - Removed bases dependency
- Fixed mkdocstrings
- Updated dependencies
"},{"location":"changelog/#103-2023-03-12","title":"[1.0.3] - 2023-03-12","text":" - Fix binary wheels
"},{"location":"changelog/#102-2023-03-12","title":"[1.0.2] - 2023-03-12","text":" - Publish binary wheels
"},{"location":"changelog/#101-2023-03-11","title":"[1.0.1] - 2023-03-11","text":" - Switch to standard bitarray module
- Switch tests to latest environments
- Add Python 3.11 support to TROVE classifiers
"},{"location":"changelog/#100-2023-01-24","title":"[1.0.0] - 2023-01-24","text":" - ISO/CD 24138 v1 Release
- Updateted dependencies
"},{"location":"changelog/#0214-2023-01-17","title":"[0.2.14] - 2023-01-17","text":" - Added ISO Reference documentation
- Removed non-standard conformance tests
- Fixed Meta-Code documentation
- Improved documentation CSS
"},{"location":"changelog/#0213-2023-01-16","title":"[0.2.13] - 2023-01-16","text":" - Added documentation for options
- Added python 3.11 support
- Added Markdown formating
- Added developmnet documentation
- Updated architecture figures
- Documentation cleanup
- Improved example code
- Optimized LF conversion
- Updated dependencies
"},{"location":"changelog/#0212-2022-11-24","title":"[0.2.12] - 2022-11-24","text":" - Fixed issue with data url compound media types
- Added ISCC version validation to
iscc_validate
- Added prefix check to
iscc_normalize
- Bundled fonts with documentation
- Updated dependencies
"},{"location":"changelog/#0211-2022-07-03","title":"[0.2.11] - 2022-07-03","text":" - Add support for gracefull build failures
"},{"location":"changelog/#0210-2022-07-03","title":"[0.2.10] - 2022-07-03","text":" - Fix pip instalation is missing setuptools
- Update mkdocs
"},{"location":"changelog/#029-2022-07-03","title":"[0.2.9] - 2022-07-03","text":" - Added iscc_compare function
- Optimized soft_hash_audio performance
- Removed Cython from build requirements
- Fixed api listing tool
- Updated codec architecture figure
- Updated dependencies
"},{"location":"changelog/#028-2022-04-21","title":"[0.2.8] - 2022-04-21","text":" - Fixed bug with subtype for semantic code
- Changed URI representation to upper case
- Changed to disallow ISCC-ID creation from ISCC-IDs
- Added line conversion tool
- Removed source wheel distribution
- Updated dependencies
"},{"location":"changelog/#027-2022-04-16","title":"[0.2.7] - 2022-04-16","text":" - Fixed bug in iscc_id_incr_v0
- Added support to accept ISCC-ID URI as input for iscc_id_incr_v0
- Added guard against custom subtype in random ISCC-CODE generation.
"},{"location":"changelog/#026-2022-04-13","title":"[0.2.6] - 2022-04-13","text":" - Added
KY
and MM
to valid prefixes - Added support to check for compiled extension modules
- Added universal wheel distribution
"},{"location":"changelog/#025-2022-04-10","title":"[0.2.5] - 2022-04-10","text":" - Fixed missing
jcs
dependency - Added SubType
NONE
to MT.ISCC to distinquish from SUM - Added support for deterministic generation of random ISCC-CODEs
- Added support for custom bit-sizes for random ISCC-CODEs
- Moved changelog into separate file
- Updated dependencies
"},{"location":"changelog/#024-2022-03-19","title":"[0.2.4] - 2022-03-19","text":" - Updated dependencies
- Added Flake.from_int and Flake.from_string
- Made Flake comparable and hashable
- Use standard hex encoded multihash for datahash and metahash
"},{"location":"changelog/#023-2022-03-06","title":"[0.2.3] - 2022-03-06","text":" - Update to iscc-schema 0.3.3
- Change image normalization instructions
- Fix issue with exporting cdc cython only functions
"},{"location":"changelog/#021-2022-03-03","title":"[0.2.1] - 2022-03-03","text":" - Cleanup and update dependencies
- Fix bitarray api change
- Fix developer commands
"},{"location":"changelog/#020-2022-02-24","title":"[0.2.0] - 2022-02-24","text":" - Complete API refactoring
- Use Data-URL as input for Meta-Code
- Use wallet address for ISCC-ID creation
- Added new Flake-Code (distributed time/random ID)
- Replaced assertions with exeptions
- Use secure random functions
- Retired Python 3.6 support (EOL)
- Return simple
dict
objects from generator functions - Added ISCC string validation
- Added multiple helper functions
"},{"location":"changelog/#019-2021-12-17","title":"[0.1.9] - 2021-12-17","text":" - Added warning on non-standard options
- Added multiformats support
- Added uri representation
- Removed redundant cdc_avg_chunk_size option
- Updated codec format documentation
"},{"location":"changelog/#018-2021-12-12","title":"[0.1.8] - 2021-12-12","text":" - Added conformance tests for all top level functions
- Added conformance tests to source dir
- Added conformance module with
selftest
function - Changed gen_image_code to accept normalized pixels instead of stream
- Changed opts to core_opts
- Removed image pre-processing and Pillow dependency
- Fixed readability of conformance tests
- Fixed soft_hash_video_v0 to accept non-tuple sequences
- Updated example code
"},{"location":"changelog/#017-2021-12-09","title":"[0.1.7] - 2021-12-09","text":" - Add dotenv for enviroment based configuration
- Cleanup package toplevel imports
- Return schema objects for iscc_code and iscc_id
- Exclude unset and none values from result dicts
- Add support for multiple code combinations for ISCC-CODE
- Add support for ISCC-ID based on singular Instance-Code
- Add initial conformance test system
"},{"location":"changelog/#016-2021-11-29","title":"[0.1.6] - 2021-11-29","text":" - Show counter for ISCC-ID in Code.explain
"},{"location":"changelog/#015-2021-11-28","title":"[0.1.5] - 2021-11-28","text":" - Fix documentation
- Change metahash creation logic
- Refactor models
- Add Content-Code-Mixed
- Add ISCC-ID
- Refactor
compose
to gen_iscc_code
- Refactor
models
to schema
"},{"location":"changelog/#014-2021-11-17","title":"[0.1.4] - 2021-11-17","text":" - Simplified options
- Optimize video WTA-hash for use with 64-bit granular features
"},{"location":"changelog/#013-2021-11-15","title":"[0.1.3] - 2021-11-15","text":" - Try to compile Cython/C accelerator modules when installing via pip
- Simplify soft_hash api return values
- Add .code() method to InstanceHasher, DataHasher
- Remove granular fingerprint calculation
- Add more top-level imports
"},{"location":"changelog/#012-2021-11-14","title":"[0.1.2] - 2021-11-14","text":" - Export more functions to toplevel
- Return schema driven objects from ISCC code generators.
"},{"location":"changelog/#011-2021-11-14","title":"[0.1.1] - 2021-11-14","text":" - Fix packaging problems
"},{"location":"changelog/#010-2021-11-13","title":"[0.1.0] - 2021-11-13","text":" - Initial release
"},{"location":"conformance/","title":"ISCC - Conformance Testing","text":"An application that claims ISCC conformance MUST pass all core functions from the ISCC conformance test suite. The test suite is available as JSON data on Github. Test data is structured as follows:
{\n\"<function_name>\": {\n\"<test_name>\": {\n\"inputs\": [\"<value1>\", \"<value2>\"],\n\"outputs\": [\"value1>\", \"<value2>\"]\n}\n}\n}\n
Inputs that are expected to be raw bytes or byte-streams
are embedded as HEX encoded strings in JSON and prefixed with stream:
or bytes
to support automated decoding during implementation testing.
Example
Byte-stream outputs in JSON test data:
\"gen_data_code_v0\": {\n\"test_0000_two_bytes_64\": {\n\"inputs\": [\n\"stream:ff00\",\n64\n],\n\"outputs\": {\n\"iscc\": \"GAAXL2XYM5BQIAZ3\"\n}\n},\n...\n
"},{"location":"conformance/#iscc_core.conformance.conformance_testdata","title":"conformance_testdata()
","text":"Yield tuples of test data.
Returns:
Type Description Generator[Tuple[str, Callable, List[Any], List[Any]]]
Tuple with testdata (test_name, func_obj, inputs, outputs)
"},{"location":"conformance/#iscc_core.conformance.conformance_selftest","title":"conformance_selftest()
","text":"Run conformance tests.
Returns:
Type Description bool
whether all tests passed
"},{"location":"constants/","title":"ISCC - Types and Constants","text":""},{"location":"constants/#iscc_core.constants.MT","title":"MT
","text":""},{"location":"constants/#iscc_core.constants.MT--mt-maintypes","title":"MT - MainTypes","text":"Uint Symbol Bits Purpose 0 META 0000 Match on metadata similarity 1 SEMANTIC 0001 Match on semantic content similarity 2 CONTENT 0010 Match on perceptual content similarity 3 DATA 0011 Match on data similarity 4 INSTANCE 0100 Match on data identity 5 ISCC 0101 Composite of two or more ISCC-UNITs with common header"},{"location":"constants/#iscc_core.constants.ST","title":"ST
","text":""},{"location":"constants/#iscc_core.constants.ST--st-subtypes","title":"ST - SubTypes","text":"Uint Symbol Bits Purpose 0 NONE 0000 For MainTypes that do not specify SubTypes"},{"location":"constants/#iscc_core.constants.ST_CC","title":"ST_CC
","text":""},{"location":"constants/#iscc_core.constants.ST_CC--st_cc","title":"ST_CC","text":"SubTypes for MT.CONTENT
Uint Symbol Bits Purpose 0 TEXT 0000 Match on syntactic text similarity 1 IMAGE 0001 Match on perceptual image similarity 2 AUDIO 0010 Match on audio chroma similarity 3 VIDEO 0011 Match on perceptual similarity 4 MIXED 0100 Match on similarity of content codes"},{"location":"constants/#iscc_core.constants.ST_ISCC","title":"ST_ISCC
","text":""},{"location":"constants/#iscc_core.constants.ST_ISCC--st_iscc","title":"ST_ISCC","text":"SubTypes for MT.ISCC
Uint Symbol Bits Purpose 0 TEXT 0000 Composite ISCC inlcuding Text-Code 1 IMAGE 0001 Composite ISCC inlcuding Image-Code 2 AUDIO 0010 Composite ISCC inlcuding Audio-Code 3 VIDEO 0011 Composite ISCC inlcuding Video-Code 4 MIXED 0100 Composite ISCC inlcuding Mixed-Code 5 SUM 0101 Composite ISCC inlcuding only Data- and Instance-Code 6 NONE 0110 Composite ISCC including Meta, Data and Instance-Code"},{"location":"constants/#iscc_core.constants.VS","title":"VS
","text":""},{"location":"constants/#iscc_core.constants.VS--vs-version","title":"VS - Version","text":"Code Version
Uint Symbol Bits Purpose 0 V0 0000 Initial Version of Code without breaking changes"},{"location":"constants/#iscc_core.constants.LN","title":"LN
","text":""},{"location":"constants/#iscc_core.constants.LN--ln-length","title":"LN - Length","text":"Valid lengths for hash-digests.
- L32 = 32
- L64 = 64
- L72 = 72
- L80 = 80
- L96 = 96
- L128 = 128
- L160 = 160
- L192 = 192
- L224 = 224
- L256 = 256
"},{"location":"constants/#iscc_core.constants.MULTIBASE","title":"MULTIBASE
","text":"Supported Multibase encodings.
- base16 -> f
- base32 -> b
- base32hex -> v
- base58btc -> z
- base64url -> u
"},{"location":"iscc_code/","title":"ISCC-CODE","text":"A multi-component identifier for digital media assets.
An ISCC-CODE can be generated from the concatenation of the digests of the following five ISCC-UNITs together with a single common header:
- Meta-Code - Encodes metadata similarity
- Semantic-Code - Encodes semantic content similarity (to be developed)
- Content-Code - Encodes syntactic/perceptual similarity
- Data-Code - Encodes raw bitstream similarity
- Instance-Code - Data checksum
The following sequences of ISCC-UNITs are possible:
- Data, Instance
- Content, Data, Instance
- Semantic, Data, Instance
- Content, Semantic, Data, Instance
- Meta, Data, Instance
- Meta, Content, Data, Instance
- Meta, Semantic, Data, Instance
- Meta, Semantic, Content, Data, Instance
"},{"location":"iscc_code/#iscc_core.iscc_code.gen_iscc_code_v0","title":"gen_iscc_code_v0(codes)
","text":"Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using algorithm v0.
Parameters:
Name Type Description Default codes
Sequence[str]
A valid sequence of singluar ISCC-UNITS.
required Returns:
Type Description dict
An ISCC object with ISCC-CODE
Source code in iscc_core\\iscc_code.py
def gen_iscc_code_v0(codes):\n# type: (Sequence[str]) -> dict\n\"\"\"\n Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using\n algorithm v0.\n :param Sequence[str] codes: A valid sequence of singluar ISCC-UNITS.\n :return: An ISCC object with ISCC-CODE\n :rtype: dict\n \"\"\"\ncodes = [ic.iscc_clean(code) for code in codes]\n# Check basic constraints\nif len(codes) < 2:\nraise ValueError(\"Minimum two ISCC units required to generate valid ISCC-CODE\")\nfor code in codes:\nif len(code) < 16:\nraise ValueError(f\"Cannot build ISCC-CODE from units shorter than 64-bits: {code}\")\n# Decode units and sort by MainType\ndecoded = sorted(\n[ic.decode_header(ic.decode_base32(code)) for code in codes], key=itemgetter(0)\n)\nmain_types = tuple(d[0] for d in decoded)\nif main_types[-2:] != (ic.MT.DATA, ic.MT.INSTANCE):\nraise ValueError(f\"ISCC-CODE requires at least MT.DATA and MT.INSTANCE units.\")\n# Determine SubType (generic mediatype)\nsub_types = [t[1] for t in decoded if t[0] in {ic.MT.SEMANTIC, ic.MT.CONTENT}]\nif len(set(sub_types)) > 1:\nraise ValueError(f\"Semantic-Code and Content-Code must be of same SubType\")\nst = sub_types.pop() if sub_types else ic.ST_ISCC.SUM if len(codes) == 2 else ic.ST_ISCC.NONE\n# Encode unit combination\nencoded_length = ic.encode_units(main_types[:-2])\n# Collect and truncate unit digests to 64-bit\ndigest = b\"\".join([t[-1][:8] for t in decoded])\nheader = ic.encode_header(ic.MT.ISCC, st, ic.VS.V0, encoded_length)\ncode = ic.encode_base32(header + digest)\niscc = \"ISCC:\" + code\nreturn dict(iscc=iscc)\n
"},{"location":"iscc_id/","title":"ISCC-ID","text":"A decentralized, owned, and short identifier for digital assets.
The ISCC-ID is generated from a similarity-hash of the units of an ISCC-CODE together with a blockchain wallet address. Its SubType designates the blockchain from which the ISCC-ID was minted. The similarity-hash is always at least 64-bits and optionally suffixed with a uvarint
endcoded uniqueness counter
. The uniqueness counter
is added and incremented only if the mint colides with a pre-existing ISCC-ID minted from the same blockchain from a different ISCC-CODE or from an identical ISCC-CODE registered by a different signatory.
"},{"location":"iscc_id/#iscc_core.iscc_id.gen_iscc_id","title":"gen_iscc_id(iscc_code, chain_id, wallet, uc = 0)
","text":"Generate ISCC-ID from ISCC-CODE with the latest standard algorithm.
Parameters:
Name Type Description Default iscc_code
str
The ISCC-CODE from which to mint the ISCC-ID.
required chain_id
int
Chain-ID of blockchain from which the ISCC-ID is minted.
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter of ISCC-ID.
0
Returns:
Type Description dict
ISCC object with an ISCC-ID
"},{"location":"iscc_id/#iscc_core.iscc_id.gen_iscc_id_v0","title":"gen_iscc_id_v0(iscc_code, chain_id, wallet, uc = 0)
","text":"Generate an ISCC-ID from an ISCC-CODE with uniqueness counter 'uc' with algorithm v0.
Parameters:
Name Type Description Default iscc_code
str
The ISCC-CODE from which to mint the ISCC-ID.
required chain_id
int
Chain-ID of blockchain from which the ISCC-ID is minted.
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter of ISCC-ID.
0
Returns:
Type Description dict
ISCC object with an ISCC-ID
"},{"location":"iscc_id/#iscc_core.iscc_id.soft_hash_iscc_id_v0","title":"soft_hash_iscc_id_v0(iscc_code, wallet, uc = 0)
","text":"Calculate ISCC-ID hash digest from ISCC-CODE with algorithm v0.
Accepts an ISCC-CODE or any sequence of ISCC-UNITs.
Parameters:
Name Type Description Default iscc_code
str
ISCC-CODE
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter for ISCC-ID.
0
Returns:
Type Description bytes
Digest for ISCC-ID without header but including uniqueness counter.
"},{"location":"iscc_id/#iscc_core.iscc_id.iscc_id_incr","title":"iscc_id_incr(iscc_id)
","text":"Increment uniqueness counter of an ISCC-ID with latest standard algorithm.
Parameters:
Name Type Description Default iscc_id
str
Base32-encoded ISCC-ID.
required Returns:
Type Description str
Base32-encoded ISCC-ID with counter incremented by one.
"},{"location":"iscc_id/#iscc_core.iscc_id.iscc_id_incr_v0","title":"iscc_id_incr_v0(iscc_id)
","text":"Increment uniqueness counter of an ISCC-ID with algorithm v0.
Parameters:
Name Type Description Default iscc_id
str
Base32-encoded ISCC-ID.
required Returns:
Type Description str
Base32-encoded ISCC-ID with counter incremented by one (without \"ISCC:\" prefix).
"},{"location":"iscc_id/#iscc_core.iscc_id.alg_simhash_from_iscc_id","title":"alg_simhash_from_iscc_id(iscc_id, wallet)
","text":"Extract similarity preserving hex-encoded hash digest from ISCC-ID
We need to un-xor the ISCC-ID hash digest with the wallet address hash to obtain the similarity preserving bytestring.
"},{"location":"iso-reference/","title":"ISCC - ISO Reference","text":"The following functions are the reference implementations of ISO 24138:
"},{"location":"iso-reference/#iso-24138-51-meta-code","title":"ISO 24138 / 5.1 Meta-Code","text":"gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits)
# Create an ISCC Meta-Code with the algorithm version 0.
Parameters:
Name Type Description Default name
str
Name or title of the work manifested by the digital asset
required description
Optional[str]
Optional description for disambiguation
None
meta
Optional[Union[dict,str]
Dict or Data-URL string with extended metadata
None
bits
int
Bit-length of resulting Meta-Code (multiple of 64)
ic.core_opts.meta_bits
Returns:
Type Description dict
ISCC object with possible fields: iscc, name, description, metadata, metahash
Source code in iscc_core\\code_meta.py
def gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits):\n# type: (str, Optional[str], Optional[ic.Meta], int) -> dict\n\"\"\"\n Create an ISCC Meta-Code with the algorithm version 0.\n :param str name: Name or title of the work manifested by the digital asset\n :param Optional[str] description: Optional description for disambiguation\n :param Optional[Union[dict,str] meta: Dict or Data-URL string with extended metadata\n :param int bits: Bit-length of resulting Meta-Code (multiple of 64)\n :return: ISCC object with possible fields: iscc, name, description, metadata, metahash\n :rtype: dict\n \"\"\"\n# 1. Normalize `name`\nname = \"\" if name is None else name\nname = text_clean(name)\nname = text_remove_newlines(name)\nname = text_trim(name, ic.core_opts.meta_trim_name)\nif not name:\nraise ValueError(\"Meta-Code requires non-empty name element (after normalization)\")\n# 2. Normalize `description`\ndescription = \"\" if description is None else description\ndescription = text_clean(description)\ndescription = text_trim(description, ic.core_opts.meta_trim_description)\n# Calculate meta_code, metahash, and output metadata values for the different input cases\nif meta:\nif isinstance(meta, str):\n# Data-URL expected\ndurl = meta\npayload = DataURL.from_url(durl).data\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = durl\nelif isinstance(meta, dict):\npayload = jcs.canonicalize(meta)\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmedia_type = \"application/ld+json\" if \"@context\" in meta else \"application/json\"\ndurl_obj = DataURL.from_data(media_type, base64_encode=True, data=payload)\nmetadata_value = durl_obj.url\nelse:\nraise TypeError(f\"metadata must be Data-URL string or dict not {type(meta)}\")\nelse:\npayload = \" \".join((name, description)).strip().encode(\"utf-8\")\nmeta_code_digest = soft_hash_meta_v0(name, description)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = None\nmeta_code = ic.encode_component(\nmtype=ic.MT.META,\nstype=ic.ST.NONE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=meta_code_digest,\n)\niscc = \"ISCC:\" + meta_code\n# Build result\nresult = {\"iscc\": iscc}\nif name:\nresult[\"name\"] = name\nif description:\nresult[\"description\"] = description\nif metadata_value:\nresult[\"meta\"] = metadata_value\nresult[\"metahash\"] = metahash\nreturn result\n
"},{"location":"iso-reference/#iso-24138-53-text-code","title":"ISO 24138 / 5.3 Text-Code","text":"gen_text_code_v0(text, bits=ic.core_opts.text_bits)
# Create an ISCC Text-Code with algorithm v0.
Note
Any markup (like HTML tags or markdown) should be removed from the plain-text before passing it to this function.
Parameters:
Name Type Description Default text
str
Text for Text-Code creation
required bits
int
Bit-length of ISCC Code Hash (default 64)
ic.core_opts.text_bits
Returns:
Type Description dict
ISCC schema instance with Text-Code and an aditional property characters
Source code in iscc_core\\code_content_text.py
def gen_text_code_v0(text, bits=ic.core_opts.text_bits):\n# type: (str, int) -> dict\n\"\"\"\n Create an ISCC Text-Code with algorithm v0.\n !!! note\n Any markup (like HTML tags or markdown) should be removed from the plain-text\n before passing it to this function.\n :param str text: Text for Text-Code creation\n :param int bits: Bit-length of ISCC Code Hash (default 64)\n :return: ISCC schema instance with Text-Code and an aditional property `characters`\n :rtype: dict\n \"\"\"\ntext = text_collapse(text)\ncharacters = len(text)\ndigest = soft_hash_text_v0(text)\ntext_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.TEXT,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + text_code\nreturn dict(iscc=iscc, characters=characters)\n
"},{"location":"iso-reference/#iso-24138-54-image-code","title":"ISO 24138 / 5.4 Image-Code","text":"gen_image_code_v0(pixels, bits=ic.core_opts.image_bits)
# Create an ISCC Content-Code Image with algorithm v0.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values)
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
Source code in iscc_core\\code_content_image.py
def gen_image_code_v0(pixels, bits=ic.core_opts.image_bits):\n# type: (Sequence[int], int) -> dict\n\"\"\"\n Create an ISCC Content-Code Image with algorithm v0.\n :param Sequence[int] pixels: Normalized image pixels (32x32 flattened gray values)\n :param int bits: Bit-length of ISCC Content-Code Image (default 64).\n :return: ISCC object with Content-Code Image.\n :rtype: ISCC\n \"\"\"\ndigest = soft_hash_image_v0(pixels, bits=bits)\nimage_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.IMAGE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + image_code\nreturn {\"iscc\": iscc}\n
"},{"location":"iso-reference/#iso-24138-55-audio-code","title":"ISO 24138 / 5.5 Audio-Code","text":"gen_audio_code_v0(cv, bits=ic.core_opts.audio_bits)
# Create an ISCC Content-Code Audio with algorithm v0.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
Source code in iscc_core\\code_content_audio.py
def gen_audio_code_v0(cv, bits=ic.core_opts.audio_bits):\n# type: (Iterable[int], int) -> dict\n\"\"\"\n Create an ISCC Content-Code Audio with algorithm v0.\n :param Iterable[int] cv: Chromaprint vector\n :param int bits: Bit-length resulting Content-Code Audio (multiple of 64)\n :return: ISCC object with Content-Code Audio\n :rtype: dict\n \"\"\"\ndigest = soft_hash_audio_v0(cv, bits=bits)\naudio_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.AUDIO,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + audio_code\nreturn {\"iscc\": iscc}\n
"},{"location":"iso-reference/#iso-24138-56-video-code","title":"ISO 24138 / 5.6 Video-Code","text":"gen_video_code_v0(frame_sigs, bits=ic.core_opts.video_bits)
# Create an ISCC Video-Code with algorithm v0.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
Source code in iscc_core\\code_content_video.py
def gen_video_code_v0(frame_sigs, bits=ic.core_opts.video_bits):\n# type: (Sequence[ic.FrameSig], int) -> dict\n\"\"\"\n Create an ISCC Video-Code with algorithm v0.\n :param ic.FrameSig frame_sigs: Sequence of MP7 frame signatures\n :param int bits: Bit-length resulting Video-Code (multiple of 64)\n :return: ISCC object with Video-Code\n :rtype: dict\n \"\"\"\ndigest = soft_hash_video_v0(frame_sigs, bits=bits)\nvideo_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.VIDEO,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + video_code\nreturn dict(iscc=iscc)\n
"},{"location":"iso-reference/#iso-24138-57-mixed-code","title":"ISO 24138 / 5.7 Mixed-Code","text":"gen_mixed_code_v0(codes, bits=ic.core_opts.mixed_bits)
# Create an ISCC Content-Code-Mixed with algorithm v0.
If the provided codes are of mixed length they are stripped to bits
length for calculation.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
Source code in iscc_core\\code_content_mixed.py
def gen_mixed_code_v0(codes, bits=ic.core_opts.mixed_bits):\n# type: (Sequence[str], int) -> dict\n\"\"\"\n Create an ISCC Content-Code-Mixed with algorithm v0.\n If the provided codes are of mixed length they are stripped to `bits` length for\n calculation.\n :param Iterable[str] codes: a list of Content-Codes.\n :param int bits: Target bit-length of generated Content-Code-Mixed.\n :return: ISCC object with Content-Code Mixed.\n :rtype: dict\n \"\"\"\ndigests = [ic.decode_base32(ic.iscc_clean(code)) for code in codes]\ndigest = soft_hash_codes_v0(digests, bits=bits)\nmixed_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.MIXED,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + mixed_code\nreturn dict(iscc=iscc, parts=list(codes))\n
"},{"location":"iso-reference/#iso-24138-58-data-code","title":"ISO 24138 / 5.8 Data-Code","text":"gen_data_code_v0(stream, bits=ic.core_opts.data_bits)
# Create an ISCC Data-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC object with Data-Code
Source code in iscc_core\\code_data.py
def gen_data_code_v0(stream, bits=ic.core_opts.data_bits):\n# type: (ic.Stream, int) -> dict\n\"\"\"\n Create an ISCC Data-Code with algorithm v0.\n :param Stream stream: Input data stream.\n :param int bits: Bit-length of ISCC Data-Code (default 64).\n :return: ISCC object with Data-Code\n :rtype: dict\n \"\"\"\nhasher = DataHasherV0()\ndata = stream.read(ic.core_opts.io_read_size)\nwhile data:\nhasher.push(data)\ndata = stream.read(ic.core_opts.io_read_size)\ndata_code = hasher.code(bits=bits)\niscc = \"ISCC:\" + data_code\nreturn dict(iscc=iscc)\n
"},{"location":"iso-reference/#iso-24138-59-instance-code","title":"ISO 24138 / 5.9 Instance-Code","text":"gen_instance_code_v0(stream, bits=ic.core_opts.instance_bits)
# Create an ISCC Instance-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length of resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with Instance-Code and properties: datahash, filesize
Source code in iscc_core\\code_instance.py
def gen_instance_code_v0(stream, bits=ic.core_opts.instance_bits):\n# type: (ic.Stream, int) -> dict\n\"\"\"\n Create an ISCC Instance-Code with algorithm v0.\n :param Stream stream: Binary data stream for Instance-Code generation\n :param int bits: Bit-length of resulting Instance-Code (multiple of 64)\n :return: ISCC object with Instance-Code and properties: datahash, filesize\n :rtype: dict\n \"\"\"\nhasher = InstanceHasherV0()\ndata = stream.read(ic.core_opts.io_read_size)\nwhile data:\nhasher.push(data)\ndata = stream.read(ic.core_opts.io_read_size)\ninstance_code = hasher.code(bits=bits)\niscc = \"ISCC:\" + instance_code\ninstance_code_obj = dict(\niscc=iscc,\ndatahash=hasher.multihash(),\nfilesize=hasher.filesize,\n)\nreturn instance_code_obj\n
"},{"location":"iso-reference/#iso-24138-60-iscc-code","title":"ISO 24138 / 6.0 ISCC-CODE","text":"gen_iscc_code_v0(codes)
# Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using algorithm v0.
Parameters:
Name Type Description Default codes
Sequence[str]
A valid sequence of singluar ISCC-UNITS.
required Returns:
Type Description dict
An ISCC object with ISCC-CODE
Source code in iscc_core\\iscc_code.py
def gen_iscc_code_v0(codes):\n# type: (Sequence[str]) -> dict\n\"\"\"\n Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using\n algorithm v0.\n :param Sequence[str] codes: A valid sequence of singluar ISCC-UNITS.\n :return: An ISCC object with ISCC-CODE\n :rtype: dict\n \"\"\"\ncodes = [ic.iscc_clean(code) for code in codes]\n# Check basic constraints\nif len(codes) < 2:\nraise ValueError(\"Minimum two ISCC units required to generate valid ISCC-CODE\")\nfor code in codes:\nif len(code) < 16:\nraise ValueError(f\"Cannot build ISCC-CODE from units shorter than 64-bits: {code}\")\n# Decode units and sort by MainType\ndecoded = sorted(\n[ic.decode_header(ic.decode_base32(code)) for code in codes], key=itemgetter(0)\n)\nmain_types = tuple(d[0] for d in decoded)\nif main_types[-2:] != (ic.MT.DATA, ic.MT.INSTANCE):\nraise ValueError(f\"ISCC-CODE requires at least MT.DATA and MT.INSTANCE units.\")\n# Determine SubType (generic mediatype)\nsub_types = [t[1] for t in decoded if t[0] in {ic.MT.SEMANTIC, ic.MT.CONTENT}]\nif len(set(sub_types)) > 1:\nraise ValueError(f\"Semantic-Code and Content-Code must be of same SubType\")\nst = sub_types.pop() if sub_types else ic.ST_ISCC.SUM if len(codes) == 2 else ic.ST_ISCC.NONE\n# Encode unit combination\nencoded_length = ic.encode_units(main_types[:-2])\n# Collect and truncate unit digests to 64-bit\ndigest = b\"\".join([t[-1][:8] for t in decoded])\nheader = ic.encode_header(ic.MT.ISCC, st, ic.VS.V0, encoded_length)\ncode = ic.encode_base32(header + digest)\niscc = \"ISCC:\" + code\nreturn dict(iscc=iscc)\n
"},{"location":"algorithms/cdc/","title":"ISCC - Content Defined Chunking","text":"Compatible with fastcdc
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_chunks","title":"alg_cdc_chunks(data, utf32, avg_chunk_size = ic.core_opts.data_avg_chunk_size)
","text":"A generator that yields data-dependent chunks for data
.
Usage Example:
for chunk in cdc_data_chunks(data):\nhash(chunk)\n
Parameters:
Name Type Description Default data
bytes
Raw data for variable sized chunking.
required utf32
bool
If true assume we are chunking text that is utf32 encoded.
required avg_chunk_size
int
Target chunk size in number of bytes.
ic.core_opts.data_avg_chunk_size
Returns:
Type Description Generator[bytes]
A generator that yields data chunks of variable sizes.
Source code in iscc_core\\cdc.py
def alg_cdc_chunks(data, utf32, avg_chunk_size=ic.core_opts.data_avg_chunk_size):\n# type: (Data, bool, int) -> Generator[bytes, None, None]\n\"\"\"\n A generator that yields data-dependent chunks for `data`.\n Usage Example:\n ```python\n for chunk in cdc_data_chunks(data):\n hash(chunk)\n ```\n :param bytes data: Raw data for variable sized chunking.\n :param bool utf32: If true assume we are chunking text that is utf32 encoded.\n :param int avg_chunk_size: Target chunk size in number of bytes.\n :return: A generator that yields data chunks of variable sizes.\n :rtype: Generator[bytes]\n \"\"\"\nstream = io.BytesIO(data)\nbuffer = stream.read(ic.core_opts.io_read_size)\nif not buffer:\nyield b\"\"\nmi, ma, cs, mask_s, mask_l = alg_cdc_params(avg_chunk_size)\nbuffer = memoryview(buffer)\nwhile buffer:\nif len(buffer) <= ma:\nbuffer = memoryview(bytes(buffer) + stream.read(ic.core_opts.io_read_size))\ncut_point = alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l)\n# Make sure cut points are at 4-byte aligned for utf32 encoded text\nif utf32:\ncut_point -= cut_point % 4\nyield bytes(buffer[:cut_point])\nbuffer = buffer[cut_point:]\n
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_offset","title":"alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l)
","text":"Find breakpoint offset for a given buffer.
Parameters:
Name Type Description Default buffer
Data
The data to be chunked.
required mi
int
Minimum chunk size.
required ma
int
Maximung chunk size.
required cs
int
Center size.
required mask_s
int
Small mask.
required mask_l
int
Large mask.
required Returns:
Type Description int
Offset of dynamic cutpoint in number of bytes.
Source code in iscc_core\\cdc.py
def alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l):\n# type: (ic.Data, int, int, int, int, int) -> int\n\"\"\"\n Find breakpoint offset for a given buffer.\n :param Data buffer: The data to be chunked.\n :param int mi: Minimum chunk size.\n :param int ma: Maximung chunk size.\n :param int cs: Center size.\n :param int mask_s: Small mask.\n :param int mask_l: Large mask.\n :return: Offset of dynamic cutpoint in number of bytes.\n :rtype: int\n \"\"\"\npattern = 0\ni = mi\nsize = len(buffer)\nbarrier = min(cs, size)\nwhile i < barrier:\npattern = (pattern >> 1) + ic.core_opts.cdc_gear[buffer[i]]\nif not pattern & mask_s:\nreturn i + 1\ni += 1\nbarrier = min(ma, size)\nwhile i < barrier:\npattern = (pattern >> 1) + ic.core_opts.cdc_gear[buffer[i]]\nif not pattern & mask_l:\nreturn i + 1\ni += 1\nreturn i\n
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_params","title":"alg_cdc_params(avg_size: int) -> tuple
","text":"Calculate CDC parameters
Parameters:
Name Type Description Default avg_size
int
Target average size of chunks in number of bytes.
required Returns:
Type Description tuple
Tuple of (min_size, max_size, center_size, mask_s, mask_l).
Source code in iscc_core\\cdc.py
def alg_cdc_params(avg_size: int) -> tuple:\n\"\"\"\n Calculate CDC parameters\n :param int avg_size: Target average size of chunks in number of bytes.\n :returns: Tuple of (min_size, max_size, center_size, mask_s, mask_l).\n \"\"\"\nceil_div = lambda x, y: (x + y - 1) // y\nmask = lambda b: 2**b - 1\nmin_size = avg_size // 4\nmax_size = avg_size * 8\noffset = min_size + ceil_div(min_size, 2)\ncenter_size = avg_size - offset\nbits = round(log2(avg_size))\nmask_s = mask(bits + 1)\nmask_l = mask(bits - 1)\nreturn min_size, max_size, center_size, mask_s, mask_l\n
"},{"location":"algorithms/dct/","title":"ISCC - Discrete Cosine Transform","text":""},{"location":"algorithms/dct/#iscc_core.dct.alg_dct","title":"alg_dct(v)
","text":"Discrete cosine transform.
See: nayuki.io.
Parameters:
Name Type Description Default v
Sequence[float]
Input vector for DCT calculation.
required Returns:
Type Description List
DCT Transformed vector.
Source code in iscc_core\\dct.py
def alg_dct(v):\n# type: (Sequence[float]) -> List\n\"\"\"\n Discrete cosine transform.\n See: [nayuki.io](https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms).\n :param Sequence[float] v: Input vector for DCT calculation.\n :return: DCT Transformed vector.\n :rtype: List\n \"\"\"\nn = len(v)\nif n == 1:\nreturn list(v)\nelif n == 0 or n % 2 != 0:\nraise ValueError()\nelse:\nhalf = n // 2\nalpha = [(v[i] + v[-(i + 1)]) for i in range(half)]\nbeta = [\n(v[i] - v[-(i + 1)]) / (math.cos((i + 0.5) * math.pi / n) * 2.0) for i in range(half)\n]\nalpha = alg_dct(alpha)\nbeta = alg_dct(beta)\nresult = []\nfor i in range(half - 1):\nresult.append(alpha[i])\nresult.append(beta[i] + beta[i + 1])\nresult.append(alpha[-1])\nresult.append(beta[-1])\nreturn result\n
"},{"location":"algorithms/minhash/","title":"ISCC - Minhash","text":""},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash","title":"alg_minhash(features)
","text":"Calculate a 64 dimensional minhash integer vector.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description List[int]
Minhash vector
Source code in iscc_core\\minhash.py
def alg_minhash(features):\n# type: (List[int]) -> List[int]\n\"\"\"\n Calculate a 64 dimensional minhash integer vector.\n :param List[int] features: List of integer features\n :return: Minhash vector\n :rtype: List[int]\n \"\"\"\nreturn [\nmin([(((a * f + b) & MAXI64) % MPRIME) & MAXH for f in features]) for a, b in zip(MPA, MPB)\n]\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_64","title":"alg_minhash_64(features)
","text":"Create 64-bit minimum hash digest.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description bytes
64-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_64(features):\n# type: (List[int]) -> bytes\n\"\"\"\n Create 64-bit minimum hash digest.\n :param List[int] features: List of integer features\n :return: 64-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nreturn alg_minhash_compress(alg_minhash(features), 1)\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_256","title":"alg_minhash_256(features)
","text":"Create 256-bit minimum hash digest.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description bytes
256-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_256(features):\n# type: (List[int]) -> bytes\n\"\"\"\n Create 256-bit minimum hash digest.\n :param List[int] features: List of integer features\n :return: 256-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nreturn alg_minhash_compress(alg_minhash(features), 4)\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_compress","title":"alg_minhash_compress(mhash, lsb = 4)
","text":"Compress minhash vector to byte hash-digest.
Concatenates lsb
number of least-significant bits from each integer in mhash
. For example an mhash
with 64 integers and lsb=4
will produce a 256-bit summary of the minhash vector.
Parameters:
Name Type Description Default mhash
List[int]
List of minhash integer features
required lsb
int
Number of the least significant bits to retain
4
Returns:
Type Description bytes
256-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_compress(mhash, lsb=4):\n# type: (List[int], int) -> bytes\n\"\"\"\n Compress minhash vector to byte hash-digest.\n Concatenates `lsb` number of least-significant bits from each integer in `mhash`.\n For example an `mhash` with 64 integers and `lsb=4` will produce a 256-bit summary\n of the minhash vector.\n :param List[int] mhash: List of minhash integer features\n :param int lsb: Number of the least significant bits to retain\n :return: 256-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nbits: str = \"\"\nfor bitpos in range(lsb):\nfor h in mhash:\nbits += str(h >> bitpos & 1)\nreturn int(bits, 2).to_bytes((len(bits) + 7) // 8, \"big\")\n
"},{"location":"algorithms/simhash/","title":"ISCC - Simhash","text":""},{"location":"algorithms/simhash/#iscc_core.simhash.alg_simhash","title":"alg_simhash(hash_digests)
","text":"Creates a similarity preserving hash from a sequence of equal sized hash digests.
Parameters:
Name Type Description Default hash_digests
list
A sequence of equaly sized byte-hashes.
required Returns:
Type Description bytes
Similarity byte-hash
Source code in iscc_core\\simhash.py
def alg_simhash(hash_digests):\n# type: (list[bytes]) -> bytes\n\"\"\"\n Creates a similarity preserving hash from a sequence of equal sized hash digests.\n :param list hash_digests: A sequence of equaly sized byte-hashes.\n :returns: Similarity byte-hash\n :rtype: bytes\n \"\"\"\nn_bytes = len(hash_digests[0])\nn_bits = n_bytes * 8\nvector = [0] * n_bits\nfor digest in hash_digests:\nh = bitarray()\nh.frombytes(digest)\nfor i in range(n_bits):\nvector[i] += h[i]\nminfeatures = len(hash_digests) / 2\nshash = 0\nfor i in range(n_bits):\nif vector[i] >= minfeatures:\nshash |= 1 << (n_bits - 1 - i)\nreturn shash.to_bytes(n_bytes, \"big\")\n
"},{"location":"algorithms/wtahash/","title":"ISCC - Winner Takes All Hash","text":""},{"location":"algorithms/wtahash/#iscc_core.wtahash.alg_wtahash","title":"alg_wtahash(vec: Sequence[float], bits: Sequence[float]) -> bytes
","text":"Calculate WTA Hash for vector with 380 values (MP7 frame signature).
Source code in iscc_core\\wtahash.py
def alg_wtahash(vec: Sequence[float], bits) -> bytes:\n\"\"\"Calculate WTA Hash for vector with 380 values (MP7 frame signature).\"\"\"\nh = []\nfor perm in WTA_VIDEO_ID_PERMUTATIONS:\nv = vec[perm[0]], vec[perm[1]]\nh.append(v.index(max(v)))\nif len(h) == bits:\nbreak\nh = bitarray(h).tobytes()\nreturn h\n
"},{"location":"codec/","title":"ISCC - Codec","text":"This module implements encoding, decoding and transcoding functions of ISCC
"},{"location":"codec/#codec-overview","title":"Codec Overview","text":""},{"location":"codec/#codec-functions","title":"Codec Functions","text":""},{"location":"codec/#iscc_core.codec.encode_component","title":"encode_component(mtype, stype, version, bit_length, digest)
","text":"Encode an ISCC unit inlcuding header and body with standard base32 encoding.
Note
The length
value must be the length in number of bits for the component. If digest
has more bits than specified by length
it wil be truncated.
Parameters:
Name Type Description Default mtype
MainType
Maintype of unit (0-6)
required stype
SubType
SubType of unit depending on MainType (0-5)
required version
Version
Version of unit algorithm (0).
required bit_length
length
Length of unit, in number of bits (multiple of 32)
required digest
bytes
The hash digest of the unit.
required Returns:
Type Description str
Base32 encoded ISCC-UNIT.
Source code in iscc_core\\codec.py
def encode_component(mtype, stype, version, bit_length, digest):\n# type: (MainType, SubType, Version, Length, bytes) -> str\n\"\"\"\n Encode an ISCC unit inlcuding header and body with standard base32 encoding.\n !!! note\n The `length` value must be the **length in number of bits** for the component.\n If `digest` has more bits than specified by `length` it wil be truncated.\n :param MainType mtype: Maintype of unit (0-6)\n :param SubType stype: SubType of unit depending on MainType (0-5)\n :param Version version: Version of unit algorithm (0).\n :param length bit_length: Length of unit, in number of bits (multiple of 32)\n :param bytes digest: The hash digest of the unit.\n :return: Base32 encoded ISCC-UNIT.\n :rtype: str\n \"\"\"\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.ID, MT.FLAKE):\nencoded_length = encode_length(mtype, bit_length)\nelif mtype == MT.ISCC:\nraise ValueError(f\"{mtype} is not a unit\")\nelse:\nraise ValueError(f\"Illegal MainType {mtype}\")\nnbytes = bit_length // 8\nheader = encode_header(mtype, stype, version, encoded_length)\nbody = digest[:nbytes]\ncomponent_code = encode_base32(header + body)\nreturn component_code\n
"},{"location":"codec/#iscc_core.codec.encode_header","title":"encode_header(mtype, stype, version = 0, length = 1)
","text":"Encodes header values with nibble-sized (4-bit) variable-length encoding. The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles is uneven it is padded with 4-bit 0000
at the end.
Warning
The length value must be encoded beforhand because its semantics depend on the MainType (see encode_length
function).
Parameters:
Name Type Description Default mtype
MainType
MainType of unit.
required stype
SubType
SubType of unit.
required version
Version
Version of component algorithm.
0
length
Length
length value of unit (1 means 64-bits for standard units)
1
Returns:
Type Description bytes
Varnibble stream encoded ISCC header as bytes.
Source code in iscc_core\\codec.py
def encode_header(mtype, stype, version=0, length=1):\n# type: (MainType, SubType, Version, Length) -> bytes\n\"\"\"\n Encodes header values with nibble-sized (4-bit) variable-length encoding.\n The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles\n is uneven it is padded with 4-bit `0000` at the end.\n !!! warning\n The length value must be encoded beforhand because its semantics depend on\n the MainType (see `encode_length` function).\n :param MainType mtype: MainType of unit.\n :param SubType stype: SubType of unit.\n :param Version version: Version of component algorithm.\n :param Length length: length value of unit (1 means 64-bits for standard units)\n :return: Varnibble stream encoded ISCC header as bytes.\n :rtype: bytes\n \"\"\"\n# TODO verify that all header params and there combination is valid\nheader = bitarray()\nfor n in (mtype, stype, version, length):\nheader += encode_varnibble(n)\n# Append zero-padding if required (right side, least significant bits).\nheader.fill()\nreturn header.tobytes()\n
"},{"location":"codec/#iscc_core.codec.decode_header","title":"decode_header(data)
","text":"Decodes varnibble encoded header and returns it together with tail data
.
Tail data is included to enable decoding of sequential ISCCs. The returned tail data must be truncated to decode_length(r[0], r[3]) bits to recover the actual hash-bytes.
Parameters:
Name Type Description Default data
bytes
ISCC bytes
required Returns:
Type Description IsccTuple
(MainType, SubType, Version, length, TailData)
Source code in iscc_core\\codec.py
def decode_header(data):\n# type: (bytes) -> IsccTuple\n\"\"\"\n Decodes varnibble encoded header and returns it together with `tail data`.\n Tail data is included to enable decoding of sequential ISCCs. The returned tail\n data must be truncated to decode_length(r[0], r[3]) bits to recover the actual\n hash-bytes.\n :param bytes data: ISCC bytes\n :return: (MainType, SubType, Version, length, TailData)\n :rtype: IsccTuple\n \"\"\"\nresult = []\nba = bitarray()\nba.frombytes(data)\ndata = ba\nfor _ in range(4):\nvalue, data = decode_varnibble(data)\nresult.append(value)\n# Strip 4-bit padding if required\nif len(data) % 8 and data[:4] == bitarray(\"0000\"):\ndata = data[4:]\nresult.append(data.tobytes())\nreturn tuple(result)\n
"},{"location":"codec/#iscc_core.codec.encode_varnibble","title":"encode_varnibble(n)
","text":"Writes integer to variable length sequence of 4-bit chunks.
Variable-length encoding scheme:
prefix bits nibbles data bits unsigned range 0 1 3 0 - 7 10 2 6 8 - 71 110 3 9 72 - 583 1110 4 12 584 - 4679 Parameters:
Name Type Description Default n
int
Positive integer to be encoded as varnibble (0-4679)
required Returns:
Type Description bitarray
Varnibble encoded integera
Source code in iscc_core\\codec.py
def encode_varnibble(n):\n# type: (int) -> bitarray\n\"\"\"\n Writes integer to variable length sequence of 4-bit chunks.\n Variable-length encoding scheme:\n ------------------------------------------------------\n | prefix bits | nibbles | data bits | unsigned range |\n | ----------- | ------- | --------- | -------------- |\n | 0 | 1 | 3 | 0 - 7 |\n | 10 | 2 | 6 | 8 - 71 |\n | 110 | 3 | 9 | 72 - 583 |\n | 1110 | 4 | 12 | 584 - 4679 |\n :param int n: Positive integer to be encoded as varnibble (0-4679)\n :return: Varnibble encoded integera\n :rtype: bitarray\n \"\"\"\nif 0 <= n < 8:\nreturn int2ba(n, length=4)\nelif 8 <= n < 72:\nreturn bitarray(\"10\") + int2ba(n - 8, length=6)\nelif 72 <= n < 584:\nreturn bitarray(\"110\") + int2ba(n - 72, length=9)\nelif 584 <= n < 4680:\nreturn bitarray(\"1110\") + int2ba(n - 584, length=12)\nelse:\nraise ValueError(\"Value must be between 0 and 4679\")\n
"},{"location":"codec/#iscc_core.codec.decode_varnibble","title":"decode_varnibble(b)
","text":"Reads first varnibble, returns its integer value and remaining bits.
Parameters:
Name Type Description Default b
bitarray
Array of header bits
required Returns:
Type Description Tuple[int, bitarray]
A tuple of the integer value of first varnible and the remaining bits.
Source code in iscc_core\\codec.py
def decode_varnibble(b):\n# type: (bitarray) -> Tuple[int, bitarray]\n\"\"\"Reads first varnibble, returns its integer value and remaining bits.\n :param bitarray b: Array of header bits\n :return: A tuple of the integer value of first varnible and the remaining bits.\n :rtype: Tuple[int, bitarray]\n \"\"\"\nbits = len(b)\nif bits >= 4 and b[0] == 0:\nreturn ba2int(b[:4]), b[4:]\nif bits >= 8 and b[1] == 0:\nreturn ba2int(b[2:8]) + 8, b[8:]\nif bits >= 12 and b[2] == 0:\nreturn ba2int(b[3:12]) + 72, b[12:]\nif bits >= 16 and b[3] == 0:\nreturn ba2int(b[4:16]) + 584, b[16:]\nraise ValueError(\"Invalid bitarray\")\n
"},{"location":"codec/#iscc_core.codec.encode_units","title":"encode_units(units)
","text":"Encodes a combination of ISCC units to an integer between 0-7 to be used as length value for the final encoding of MT.ISCC
Parameters:
Name Type Description Default units
Tuple
A tuple of a MainType combination (can be empty)
required Returns:
Type Description int
Integer value to be used as length-value for header encoding
Source code in iscc_core\\codec.py
def encode_units(units):\n# type: (Tuple[MT, ...]) -> int\n\"\"\"\n Encodes a combination of ISCC units to an integer between 0-7 to be used as length\n value for the final encoding of MT.ISCC\n :param Tuple units: A tuple of a MainType combination (can be empty)\n :return: Integer value to be used as length-value for header encoding\n :rtype: int\n \"\"\"\nreturn UNITS.index(units)\n
"},{"location":"codec/#iscc_core.codec.decode_units","title":"decode_units(unit_id)
","text":"Decodes an ISCC header length value that has been encoded with a unit_id to an ordered tuple of MainTypes.
Source code in iscc_core\\codec.py
def decode_units(unit_id):\n# type: (int) -> Tuple[MT, ...]\n\"\"\"\n Decodes an ISCC header length value that has been encoded with a unit_id to an\n ordered tuple of MainTypes.\n \"\"\"\nunits = sorted(UNITS[unit_id])\nreturn tuple(MT(u) for u in units)\n
"},{"location":"codec/#iscc_core.codec.encode_length","title":"encode_length(mtype, length)
","text":"Encode length to integer value for header encoding.
The length
value has MainType-specific semantics:
For MainTypes META
, SEMANTIC
, CONTENT
, DATA
, INSTANCE
:
Length means number of bits for the body.\nLength is encoded as the multiple of 32-bit chunks (0 being 32bits)\nExamples: 32 -> 0, 64 -> 1, 96 -> 2 ...\n
For MainType ISCC
:
MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are\noptional. Length means the composition of optional 64-bit units included\nin the ISCC composite.\n\nExamples:\n No optional units -> 0000 -> 0\n CONTENT -> 0001 -> 1\n SEMANTIC -> 0010 -> 2\n SEMANTIC, CONTENT -> 0011 -> 3\n META -> 0100 -> 4\n META, CONTENT -> 0101 -> 5\n ...\n
For MainType ID
:
Lengths means number the number of bits for the body including the counter\nLength is encoded as number of bytes of the counter (64-bit body is implicit)\nExamples:\n 64 -> 0 (No counter)\n 72 -> 1 (One byte counter)\n 80 -> 2 (Two byte counter)\n ...\n
Parameters:
Name Type Description Default mtype
MainType
The MainType for which to encode the length value.
required length
Length
The length expressed according to the semantics of the type
required Returns:
Type Description int
The length value encoded as integer for use with write_header.
Source code in iscc_core\\codec.py
def encode_length(mtype, length):\n# type: (MainType, Length) -> int\n\"\"\"\n Encode length to integer value for header encoding.\n The `length` value has MainType-specific semantics:\n For MainTypes `META`, `SEMANTIC`, `CONTENT`, `DATA`, `INSTANCE`:\n Length means number of bits for the body.\n Length is encoded as the multiple of 32-bit chunks (0 being 32bits)\n Examples: 32 -> 0, 64 -> 1, 96 -> 2 ...\n For MainType `ISCC`:\n MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are\n optional. Length means the composition of optional 64-bit units included\n in the ISCC composite.\n Examples:\n No optional units -> 0000 -> 0\n CONTENT -> 0001 -> 1\n SEMANTIC -> 0010 -> 2\n SEMANTIC, CONTENT -> 0011 -> 3\n META -> 0100 -> 4\n META, CONTENT -> 0101 -> 5\n ...\n For MainType `ID`:\n Lengths means number the number of bits for the body including the counter\n Length is encoded as number of bytes of the counter (64-bit body is implicit)\n Examples:\n 64 -> 0 (No counter)\n 72 -> 1 (One byte counter)\n 80 -> 2 (Two byte counter)\n ...\n :param MainType mtype: The MainType for which to encode the length value.\n :param Length length: The length expressed according to the semantics of the type\n :return: The length value encoded as integer for use with write_header.\n :rtype: int\n \"\"\"\nerror = f\"Invalid length {length} for MainType {mtype}\"\n# standard case (length field denotes number of 32-bit chunks, 0 being 32-bits)\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):\nif length >= 32 and not length % 32:\nreturn (length // 32) - 1\nraise ValueError(error)\n# flag type encoding of included components (pass through as encoded out-of-band)\nelif mtype == MT.ISCC:\nif 0 <= length <= 7:\nreturn length\nraise ValueError(error)\n# counter byte lenght encoding\nelif mtype == MT.ID:\nif 64 <= length <= 96:\nreturn (length - 64) // 8\nraise ValueError(error)\nelse:\nraise ValueError(error)\n
"},{"location":"codec/#iscc_core.codec.decode_length","title":"decode_length(mtype, length)
","text":"Dedoce raw length value from ISCC header to length of digest in number of bits.
Decodes a raw header integer value in to its semantically meaningfull value (e.g. number of bits)
Source code in iscc_core\\codec.py
def decode_length(mtype, length):\n# type: (MainType, Length) -> LN\n\"\"\"\n Dedoce raw length value from ISCC header to length of digest in number of bits.\n Decodes a raw header integer value in to its semantically meaningfull value (e.g.\n number of bits)\n \"\"\"\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):\nreturn LN((length + 1) * 32)\nelif mtype == MT.ISCC:\nreturn LN(len(decode_units(length)) * 64 + 128)\nelif mtype == MT.ID:\nreturn LN(length * 8 + 64)\nelse:\nraise ValueError(f\"Invalid length {length} for MainType {mtype}\")\n
"},{"location":"codec/#iscc_core.codec.encode_base32","title":"encode_base32(data)
","text":"Standard RFC4648 base32 encoding without padding.
Source code in iscc_core\\codec.py
def encode_base32(data):\n# type: (bytes) -> str\n\"\"\"\n Standard RFC4648 base32 encoding without padding.\n \"\"\"\nreturn b32encode(data).decode(\"ascii\").rstrip(\"=\")\n
"},{"location":"codec/#iscc_core.codec.decode_base32","title":"decode_base32(code)
","text":"Standard RFC4648 base32 decoding without padding and with casefolding.
Source code in iscc_core\\codec.py
def decode_base32(code):\n# type: (str) -> bytes\n\"\"\"\n Standard RFC4648 base32 decoding without padding and with casefolding.\n \"\"\"\n# python stdlib does not support base32 without padding, so we have to re-pad.\ncl = len(code)\npad_length = math.ceil(cl / 8) * 8 - cl\nreturn bytes(b32decode(code + \"=\" * pad_length, casefold=True))\n
"},{"location":"codec/#iscc_core.codec.iscc_decompose","title":"iscc_decompose(iscc_code)
","text":"Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.
A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated by a hyphen.
Source code in iscc_core\\codec.py
def iscc_decompose(iscc_code):\n# type: (str) -> List[str]\n\"\"\"\n Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.\n A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated\n by a hyphen.\n \"\"\"\niscc_code = iscc_clean(iscc_code)\ncomponents = []\nraw_code = decode_base32(iscc_code)\nwhile raw_code:\nmt, st, vs, ln, body = decode_header(raw_code)\n# standard ISCC-UNIT with tail continuation\nif mt != MT.ISCC:\nln_bits = decode_length(mt, ln)\ncode = encode_component(mt, st, vs, ln_bits, body[: ln_bits // 8])\ncomponents.append(code)\nraw_code = body[ln_bits // 8 :]\ncontinue\n# ISCC-CODE\nmain_types = decode_units(ln)\n# rebuild dynamic units (META, SEMANTIC, CONTENT)\nfor idx, mtype in enumerate(main_types):\nstype = ST.NONE if mtype == MT.META else st\ncode = encode_component(mtype, stype, vs, 64, body[idx * 8 :])\ncomponents.append(code)\n# rebuild static units (DATA, INSTANCE)\ndata_code = encode_component(MT.DATA, ST.NONE, vs, 64, body[-16:-8])\ninstance_code = encode_component(MT.INSTANCE, ST.NONE, vs, 64, body[-8:])\ncomponents.extend([data_code, instance_code])\nbreak\nreturn components\n
"},{"location":"codec/#iscc_core.codec.iscc_normalize","title":"iscc_normalize(iscc_code)
","text":"Normalize an ISCC to its canonical form.
The canonical form of an ISCC is its shortest base32 encoded representation prefixed with the string ISCC:
.
Possible valid inputs:
MEACB7X7777574L6\nISCC:MEACB7X7777574L6\nfcc010001657fe7cafe9791bb\niscc:maagztfqttvizpjr\nIscc:Maagztfqttvizpjr\n
Info
A concatenated sequence of codes will be composed into a single ISCC of MainType MT.ISCC
if possible.
Example
>>> import iscc_core\n>>> iscc_core.iscc_normalize(\"GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W\")\n'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'\n
Parameters:
Name Type Description Default iscc_code
str
Any valid ISCC string
required Returns:
Type Description str
Normalized ISCC
Source code in iscc_core\\codec.py
def iscc_normalize(iscc_code):\n# type: (str) -> str\n\"\"\"\n Normalize an ISCC to its canonical form.\n The canonical form of an ISCC is its shortest base32 encoded representation\n prefixed with the string `ISCC:`.\n Possible valid inputs:\n MEACB7X7777574L6\n ISCC:MEACB7X7777574L6\n fcc010001657fe7cafe9791bb\n iscc:maagztfqttvizpjr\n Iscc:Maagztfqttvizpjr\n !!! info\n A concatenated sequence of codes will be composed into a single ISCC of MainType\n `MT.ISCC` if possible.\n !!! example\n ``` py\n >>> import iscc_core\n >>> iscc_core.iscc_normalize(\"GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W\")\n 'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'\n ```\n :param str iscc_code: Any valid ISCC string\n :return: Normalized ISCC\n :rtype: str\n \"\"\"\nfrom iscc_core.iscc_code import gen_iscc_code_v0\ndecoders = {\nMULTIBASE.base16.value: bytes.fromhex, # f\nMULTIBASE.base32.value: decode_base32, # b\nMULTIBASE.base32hex.value: decode_base32hex, # v\nMULTIBASE.base58btc.value: base58.b58decode, # z\nMULTIBASE.base64url.value: decode_base64, # u\n}\n# Transcode to base32 if <multibase><multicodec> encoded\nmultibase_prefix = iscc_code[0]\nif multibase_prefix in decoders.keys():\ndecoder = decoders[multibase_prefix]\ndecoded = decoder(iscc_code[1:])\nif not decoded.startswith(MC_PREFIX):\nraise ValueError(f\"Malformed multiformat codec: {decoded[:2]}\")\niscc_code = encode_base32(decoded[2:])\nelse:\nprefix = iscc_code.lstrip(\"ISCC:\").lstrip(\"iscc:\")[:2].upper()\nif prefix not in PREFIXES:\nraise ValueError(f\"ISCC starts with invalid prefix {prefix}\")\ndecomposed = iscc_decompose(iscc_code)\nrecomposed = gen_iscc_code_v0(decomposed)[\"iscc\"] if len(decomposed) >= 2 else decomposed[0]\nreturn f\"ISCC:{recomposed}\" if not recomposed.startswith(\"ISCC:\") else recomposed\n
"},{"location":"codec/#alternate-encodings","title":"Alternate Encodings","text":""},{"location":"codec/#iscc_core.codec.encode_base64","title":"encode_base64(data)
","text":"Standard RFC4648 base64url encoding without padding.
Source code in iscc_core\\codec.py
def encode_base64(data):\n# type: (bytes) -> str\n\"\"\"\n Standard RFC4648 base64url encoding without padding.\n \"\"\"\ncode = urlsafe_b64encode(data).decode(\"ascii\")\nreturn code.rstrip(\"=\")\n
"},{"location":"codec/#iscc_core.codec.decode_base64","title":"decode_base64(code)
","text":"Standard RFC4648 base64url decoding without padding.
Source code in iscc_core\\codec.py
def decode_base64(code):\n# type: (str) -> bytes\n\"\"\"\n Standard RFC4648 base64url decoding without padding.\n \"\"\"\npadding = 4 - (len(code) % 4)\nstring = code + (\"=\" * padding)\nreturn urlsafe_b64decode(string)\n
"},{"location":"codec/#iscc_core.codec.encode_base32hex","title":"encode_base32hex(data)
","text":"RFC4648 Base32hex encoding without padding
see: https://tools.ietf.org/html/rfc4648#page-10
Source code in iscc_core\\codec.py
def encode_base32hex(data):\n# type: (bytes) -> str\n\"\"\"\n RFC4648 Base32hex encoding without padding\n see: https://tools.ietf.org/html/rfc4648#page-10\n \"\"\"\nb32 = encode_base32(data)\nreturn b32.translate(b32_to_hex)\n
"},{"location":"codec/#iscc_core.codec.decode_base32hex","title":"decode_base32hex(code)
","text":"RFC4648 Base32hex decoding without padding
see: https://tools.ietf.org/html/rfc4648#page-10
Source code in iscc_core\\codec.py
def decode_base32hex(code):\n# type: (str) -> bytes\n\"\"\"\n RFC4648 Base32hex decoding without padding\n see: https://tools.ietf.org/html/rfc4648#page-10\n \"\"\"\nb32 = code.translate(hex_to_b32)\nreturn decode_base32(b32)\n
"},{"location":"codec/#helper-functions","title":"Helper Functions","text":""},{"location":"codec/#iscc_core.codec.iscc_decode","title":"iscc_decode(iscc)
","text":"Decode ISCC to an IsccTuple
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description IsccTuple
ISCC decoded to a tuple
Source code in iscc_core\\codec.py
def iscc_decode(iscc):\n# type: (str) -> IsccTuple\n\"\"\"\n Decode ISCC to an IsccTuple\n :param str iscc: ISCC string\n :return: ISCC decoded to a tuple\n :rtype: IsccTuple\n \"\"\"\niscc = iscc_clean(iscc_normalize(iscc))\ndata = decode_base32(iscc)\nreturn decode_header(data)\n
"},{"location":"codec/#iscc_core.codec.iscc_explain","title":"iscc_explain(iscc)
","text":"Convert ISCC to a human-readable representation
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description str
Human-readable representation of ISCC
Source code in iscc_core\\codec.py
def iscc_explain(iscc):\n# type: (str) -> str\n\"\"\"\n Convert ISCC to a human-readable representation\n :param str iscc: ISCC string\n :return: Human-readable representation of ISCC\n :rtype: str\n \"\"\"\ntid = iscc_type_id(iscc)\nfields = iscc_decode(iscc)\nif fields[0] == MT.ID:\ncounter_bytes = fields[-1][8:]\nif counter_bytes:\ncounter = uvarint.decode(counter_bytes)\nhex_hash = fields[-1][:8].hex()\nreturn f\"{tid}-{hex_hash}-{counter.integer}\"\nhex_hash = fields[-1].hex()\nreturn f\"{tid}-{hex_hash}\"\n
"},{"location":"codec/#iscc_core.codec.iscc_type_id","title":"iscc_type_id(iscc)
","text":"Extract and convert ISCC HEADER to a readable Type-ID string.
Type-ids can be used as names in databases to index ISCC-UNITs seperatly.
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description str
Unique Type-ID string
Source code in iscc_core\\codec.py
def iscc_type_id(iscc):\n# type: (str) -> str\n\"\"\"\n Extract and convert ISCC HEADER to a readable Type-ID string.\n Type-ids can be used as names in databases to index ISCC-UNITs seperatly.\n :param str iscc: ISCC string\n :return: Unique Type-ID string\n :rtype: str\n \"\"\"\nfields = iscc_decode(iscc)\nmtype = MT(fields[0])\nstype = SUBTYPE_MAP[fields[0]](fields[1])\nif mtype == MT.ISCC:\nmtypes = decode_units(fields[3])\nlength = \"\".join([t.name[0] for t in mtypes]) + \"DI\"\nelse:\nlength = decode_length(fields[0], fields[3])\nversion = VS(fields[2])\nreturn f\"{mtype.name}-{stype.name}-{version.name}-{length}\"\n
"},{"location":"codec/#iscc_core.codec.iscc_validate","title":"iscc_validate(iscc, strict = True)
","text":"Validate that a given string is a strictly well-formed ISCC.
A strictly well-formed ISCC is:
- an ISCC-CODE or ISCC-UNIT
- encoded with base32 upper without padding
- has a valid combination of header values
- is represented in its canonical URI form
Parameters:
Name Type Description Default iscc
str
ISCC string
required strict
bool
Raise an exeption if validation fails (default True)
True
Returns:
Type Description bool
True if sting is valid else false. (raises ValueError in strict mode)
Source code in iscc_core\\codec.py
def iscc_validate(iscc, strict=True):\n# type: (str, bool) -> bool\n\"\"\"\n Validate that a given string is a *strictly well-formed* ISCC.\n A *strictly well-formed* ISCC is:\n - an ISCC-CODE or ISCC-UNIT\n - encoded with base32 upper without padding\n - has a valid combination of header values\n - is represented in its canonical URI form\n :param str iscc: ISCC string\n :param bool strict: Raise an exeption if validation fails (default True)\n :return: True if sting is valid else false. (raises ValueError in strict mode)\n :rtype: bool\n \"\"\"\n# Basic regex validation\nmatch = re.match(\"^ISCC:[A-Z2-7]{10,60}$\", iscc)\nif not match:\nif strict:\nraise ValueError(\"ISCC string does not match ^ISCC:[A-Z2-7]{10,60}$\")\nelse:\nreturn False\ncleaned = iscc_clean(iscc)\nprefix = cleaned[:2]\nif prefix not in PREFIXES:\nif strict:\nraise ValueError(f\"Header starts with invalid sequence {prefix}\")\nelse:\nreturn False\nm, s, v, l, t = decode_header(decode_base32(cleaned))\nif v != 0:\nif strict:\nraise ValueError(f\"Unknown version {v} in version header\")\nelse:\nreturn False\nreturn True\n
"},{"location":"codec/#iscc_core.codec.iscc_clean","title":"iscc_clean(iscc)
","text":"Cleanup ISCC string.
Removes leading scheme, dashes, leading/trailing whitespace.
Parameters:
Name Type Description Default iscc
str
Any valid ISCC string
required Returns:
Type Description str
Cleaned ISCC string.
Source code in iscc_core\\codec.py
def iscc_clean(iscc):\n# type: (str) -> str\n\"\"\"\n Cleanup ISCC string.\n Removes leading scheme, dashes, leading/trailing whitespace.\n :param str iscc: Any valid ISCC string\n :return: Cleaned ISCC string.\n :rtype: str\n \"\"\"\nsplit = [part.strip() for part in iscc.strip().split(\":\")]\nif len(split) == 1:\ncode = split[0]\n# remove dashes if not multiformat\nif code[0] not in list(MULTIBASE):\ncode = code.replace(\"-\", \"\")\nreturn code\nelif len(split) == 2:\nscheme, code = split\nif scheme.lower() != \"iscc\":\nraise ValueError(f\"Invalid scheme: {scheme}\")\nreturn code.replace(\"-\", \"\")\nelse:\nraise ValueError(f\"Malformed ISCC string: {iscc}\")\n
"},{"location":"options/options/","title":"ISCC-CORE - Configuration Options","text":"Options for the iscc-core package can be configured using environment variables. Variables are loaded as class-attributes on the CoreOptions
instance. Environment variables are named like the class-attribute but prefixed with ISCC_CORE_
and upper-cased.
Example how to access configuration options
import iscc_core as ic\n# To access ISCC_CORE_TEXT_NGRAM_SIZE setting use\ntext_ngram_size: int = ic.core_opts.text_ngram_size\n
"},{"location":"options/options/#iscc_core.options.CoreOptions","title":"CoreOptions","text":"Parameters with defaults for ISCC calculations.
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_bits","title":"meta_bits class-attribute
instance-attribute
","text":"meta_bits: int = Field(\n64,\ndescription=\"Default length of generated Meta-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_trim_name","title":"meta_trim_name class-attribute
instance-attribute
","text":"meta_trim_name: int = Field(\n128, description=\"Trim `name` to this mumber of bytes\"\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_trim_description","title":"meta_trim_description class-attribute
instance-attribute
","text":"meta_trim_description: int = Field(\n4096,\ndescription=\"Trim `description` to this number of bytes\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_ngram_size_text","title":"meta_ngram_size_text class-attribute
instance-attribute
","text":"meta_ngram_size_text: int = Field(\n3,\ndescription=\"Sliding window width (characters) for metadata\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_ngram_size_bytes","title":"meta_ngram_size_bytes class-attribute
instance-attribute
","text":"meta_ngram_size_bytes: int = Field(\n4,\ndescription=\"Sliding window width (bytes) for metadata\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_bits","title":"text_bits class-attribute
instance-attribute
","text":"text_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Text in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_ngram_size","title":"text_ngram_size class-attribute
instance-attribute
","text":"text_ngram_size: int = Field(\n13,\ndescription=\"Number of characters per feature hash (size of sliding window)\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_unicode_filter","title":"text_unicode_filter class-attribute
instance-attribute
","text":"text_unicode_filter: frozenset = Field(\nfrozenset({\"C\", \"M\", \"P\"}),\ndescription=\"Unicode categories to remove during text normalization\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_newlines","title":"text_newlines class-attribute
instance-attribute
","text":"text_newlines: frozenset = Field(\nfrozenset(\n{\n\"\\n\",\n\"\\x0b\",\n\"\\x0c\",\n\"\\r\",\n\"\\x85\",\n\"\\u2028\",\n\"\\u2029\",\n}\n),\ndescription=\"Characters regarded as newline characters for normalization purposes\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.image_bits","title":"image_bits class-attribute
instance-attribute
","text":"image_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Image in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.audio_bits","title":"audio_bits class-attribute
instance-attribute
","text":"audio_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Audio in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.video_bits","title":"video_bits class-attribute
instance-attribute
","text":"video_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Video in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.data_bits","title":"data_bits class-attribute
instance-attribute
","text":"data_bits: int = Field(\n64,\ndescription=\"Default length of generated Data-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.data_avg_chunk_size","title":"data_avg_chunk_size class-attribute
instance-attribute
","text":"data_avg_chunk_size: int = Field(\n1024,\ndescription=\"Target chunk size for data chunking in number of bytes.\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.instance_bits","title":"instance_bits class-attribute
instance-attribute
","text":"instance_bits: int = Field(\n64,\ndescription=\"Default length of generated Instance-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.mixed_bits","title":"mixed_bits class-attribute
instance-attribute
","text":"mixed_bits: int = Field(\n64,\ndescription=\"Default length of generated Mixed-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.io_read_size","title":"io_read_size class-attribute
instance-attribute
","text":"io_read_size: int = Field(\n2097152,\ndescription=\"File read buffer size in bytes for hashing operations\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.cdc_gear","title":"cdc_gear class-attribute
instance-attribute
","text":"cdc_gear: Tuple = Field(\n(\n1553318008,\n574654857,\n759734804,\n310648967,\n1393527547,\n1195718329,\n694400241,\n1154184075,\n1319583805,\n1298164590,\n122602963,\n989043992,\n1918895050,\n933636724,\n1369634190,\n1963341198,\n1565176104,\n1296753019,\n1105746212,\n1191982839,\n1195494369,\n29065008,\n1635524067,\n722221599,\n1355059059,\n564669751,\n1620421856,\n1100048288,\n1018120624,\n1087284781,\n1723604070,\n1415454125,\n737834957,\n1854265892,\n1605418437,\n1697446953,\n973791659,\n674750707,\n1669838606,\n320299026,\n1130545851,\n1725494449,\n939321396,\n748475270,\n554975894,\n1651665064,\n1695413559,\n671470969,\n992078781,\n1935142196,\n1062778243,\n1901125066,\n1935811166,\n1644847216,\n744420649,\n2068980838,\n1988851904,\n1263854878,\n1979320293,\n111370182,\n817303588,\n478553825,\n694867320,\n685227566,\n345022554,\n2095989693,\n1770739427,\n165413158,\n1322704750,\n46251975,\n710520147,\n700507188,\n2104251000,\n1350123687,\n1593227923,\n1756802846,\n1179873910,\n1629210470,\n358373501,\n807118919,\n751426983,\n172199468,\n174707988,\n1951167187,\n1328704411,\n2129871494,\n1242495143,\n1793093310,\n1721521010,\n306195915,\n1609230749,\n1992815783,\n1790818204,\n234528824,\n551692332,\n1930351755,\n110996527,\n378457918,\n638641695,\n743517326,\n368806918,\n1583529078,\n1767199029,\n182158924,\n1114175764,\n882553770,\n552467890,\n1366456705,\n934589400,\n1574008098,\n1798094820,\n1548210079,\n821697741,\n601807702,\n332526858,\n1693310695,\n136360183,\n1189114632,\n506273277,\n397438002,\n620771032,\n676183860,\n1747529440,\n909035644,\n142389739,\n1991534368,\n272707803,\n1905681287,\n1210958911,\n596176677,\n1380009185,\n1153270606,\n1150188963,\n1067903737,\n1020928348,\n978324723,\n962376754,\n1368724127,\n1133797255,\n1367747748,\n1458212849,\n537933020,\n1295159285,\n2104731913,\n1647629177,\n1691336604,\n922114202,\n170715530,\n1608833393,\n62657989,\n1140989235,\n381784875,\n928003604,\n449509021,\n1057208185,\n1239816707,\n525522922,\n476962140,\n102897870,\n132620570,\n419788154,\n2095057491,\n1240747817,\n1271689397,\n973007445,\n1380110056,\n1021668229,\n12064370,\n1186917580,\n1017163094,\n597085928,\n2018803520,\n1795688603,\n1722115921,\n2015264326,\n506263638,\n1002517905,\n1229603330,\n1376031959,\n763839898,\n1970623926,\n1109937345,\n524780807,\n1976131071,\n905940439,\n1313298413,\n772929676,\n1578848328,\n1108240025,\n577439381,\n1293318580,\n1512203375,\n371003697,\n308046041,\n320070446,\n1252546340,\n568098497,\n1341794814,\n1922466690,\n480833267,\n1060838440,\n969079660,\n1836468543,\n2049091118,\n2023431210,\n383830867,\n2112679659,\n231203270,\n1551220541,\n1377927987,\n275637462,\n2110145570,\n1700335604,\n738389040,\n1688841319,\n1506456297,\n1243730675,\n258043479,\n599084776,\n41093802,\n792486733,\n1897397356,\n28077829,\n1520357900,\n361516586,\n1119263216,\n209458355,\n45979201,\n363681532,\n477245280,\n2107748241,\n601938891,\n244572459,\n1689418013,\n1141711990,\n1485744349,\n1181066840,\n1950794776,\n410494836,\n1445347454,\n2137242950,\n852679640,\n1014566730,\n1999335993,\n1871390758,\n1736439305,\n231222289,\n603972436,\n783045542,\n370384393,\n184356284,\n709706295,\n1453549767,\n591603172,\n768512391,\n854125182,\n),\ndescription=\"Random gear vector\",\n)\n
"},{"location":"options/options/#iscc_core.options.conformanc_critical","title":"conformanc_critical module-attribute
","text":"conformanc_critical = {\n\"meta_trim_name\",\n\"meta_trim_description\",\n\"meta_ngram_size_text\",\n\"meta_ngram_size_bytes\",\n\"text_ngram_size\",\n\"text_unicode_filter\",\n\"text_newlines\",\n\"data_avg_chunk_size\",\n\"cdc_gear\",\n}\n
"},{"location":"options/options/#iscc_core.options.has_logged_confromance","title":"has_logged_confromance module-attribute
","text":"has_logged_confromance = False\n
"},{"location":"options/options/#iscc_core.options.conformance_check_options","title":"conformance_check_options","text":"conformance_check_options(opts)\n
Check and log if options have non-default conformance critical values
"},{"location":"options/options/#iscc_core.options.core_opts","title":"core_opts module-attribute
","text":"core_opts = CoreOptions()\n
"},{"location":"options/options/#iscc_core.options.conformant_options","title":"conformant_options module-attribute
","text":"conformant_options = conformance_check_options(core_opts)\n
"},{"location":"units/","title":"ISCC - UNITs","text":"A standard ISCC-CODE is build from multiple ISCC-UNITs. Each unit serve a different purpose.
"},{"location":"units/code_data/","title":"ISCC - Data-Code","text":"A similarity perserving hash for binary data (soft hash).
"},{"location":"units/code_data/#iscc_core.code_data.gen_data_code","title":"gen_data_code(stream, bits = ic.core_opts.data_bits)
","text":"Create a similarity preserving ISCC Data-Code with the latest standard algorithm.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.gen_data_code_v0","title":"gen_data_code_v0(stream, bits = ic.core_opts.data_bits)
","text":"Create an ISCC Data-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC object with Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.soft_hash_data_v0","title":"soft_hash_data_v0(stream)
","text":"Create a similarity preserving Data-Hash digest
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required Returns:
Type Description bytes
256-bit Data-Hash (soft-hash) digest used as body for Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0","title":"DataHasherV0
","text":"Incremental Data-Hash generator.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.__init__","title":"__init__(data = None)
","text":"Create a DataHasher
Parameters:
Name Type Description Default data
Optional[Data]
initial payload for hashing.
None
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.push","title":"push(data)
","text":"Push data to the Data-Hash generator.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.digest","title":"digest()
","text":"Calculate 256-bit minhash digest from feature hashes.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.code","title":"code(bits = ic.core_opts.data_bits)
","text":"Encode digest as an ISCC Data-Code unit.
Parameters:
Name Type Description Default bits
int
Number of bits for the ISCC Data-Code
ic.core_opts.data_bits
Returns:
Type Description str
ISCC Data-Code
"},{"location":"units/code_flake/","title":"ISCC - Flake-Code","text":"A unique, time-sorted identifier composed of an 48-bit timestamp and 16 to 208 bit randomness.
The ISCC Flake-Code is a unique identifier for distributed ID generation. The 64-bit version can be used as efficient surrogate key in database systems. It has guaranteed uniqueness if generated from a singele process and is time sortable in integer and base32hex representation. The 128-bit version is a K-sortable, globally unique identifier for use in distributed systems and is compatible with UUID.
Example
>>> import iscc_core as ic\n>>> ic.gen_flake_code(bits=64)\n{'iscc': 'ISCC:OAAQC7YN7PG2XOR4'}\n>>> ic.gen_flake_code(bits=128)\n{'iscc': 'ISCC:OABQC7YN7RJGUUTLKDSKBXO25MA5E'}\n# Or use the convenience Flake class for easy access to different representations\n>>> flake = ic.Flake(bits=64)\n>>> flake.iscc\n'ISCC:OAAQC7YOADBZYNF7'\n>>> flake.time\n'2022-02-18T18:03:25.468'\n>>> flake.int\n107820312524764351\n>>> flake.string\n'05VGS063JGQBU'\n
"},{"location":"units/code_flake/#iscc_core.code_flake.gen_flake_code","title":"gen_flake_code(bits = ic.core_opts.flake_bits)
","text":"Create an ISCC Flake-Code with the latest standard algorithm
Parameters:
Name Type Description Default bits
int
Target bit-length of generated Flake-Code
ic.core_opts.flake_bits
Returns:
Type Description dict
ISCC object with Flake-Code
"},{"location":"units/code_flake/#iscc_core.code_flake.gen_flake_code_v0","title":"gen_flake_code_v0(bits = ic.core_opts.flake_bits)
","text":"Create an ISCC Flake-Code with the latest algorithm v0
Parameters:
Name Type Description Default bits
int
Target bit-length of generated Flake-Code
ic.core_opts.flake_bits
Returns:
Type Description dict
ISCC object with Flake-Code
"},{"location":"units/code_flake/#iscc_core.code_flake.uid_flake_v0","title":"uid_flake_v0(ts = None, bits = ic.core_opts.flake_bits)
","text":"Generate time and randomness based Flake-Hash
Parameters:
Name Type Description Default ts
Optional[float]
Unix timestamp (defaults to current time)
None
bits
int
Bit-length resulting Flake-Code (multiple of 32)
ic.core_opts.flake_bits
Returns:
Type Description bytes
Flake-Hash digest
"},{"location":"units/code_instance/","title":"ISCC - Instance-Code","text":"A data checksum.
"},{"location":"units/code_instance/#iscc_core.code_instance.gen_instance_code","title":"gen_instance_code(stream, bits = ic.core_opts.instance_bits)
","text":"Create an ISCC Instance-Code with the latest standard algorithm.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with properties: iscc, datahash, filesize
"},{"location":"units/code_instance/#iscc_core.code_instance.gen_instance_code_v0","title":"gen_instance_code_v0(stream, bits = ic.core_opts.instance_bits)
","text":"Create an ISCC Instance-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length of resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with Instance-Code and properties: datahash, filesize
"},{"location":"units/code_instance/#iscc_core.code_instance.hash_instance_v0","title":"hash_instance_v0(stream)
","text":"Create 256-bit hash digest for the Instance-Code body
Parameters:
Name Type Description Default stream
Stream
Binary data stream for hash generation.
required Returns:
Type Description bytes
256-bit Instance-Hash digest used as body of Instance-Code
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0","title":"InstanceHasherV0
","text":"Incremental Instance-Hash generator.
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.push","title":"push(data)
","text":"Push data to the Instance-Hash generator.
Parameters:
Name Type Description Default data
Data
Data to be hashed
required"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.digest","title":"digest()
","text":"Return Instance-Hash
Returns:
Type Description bytes
Instance-Hash digest
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.multihash","title":"multihash()
","text":"Return blake3 multihash
Returns:
Type Description str
Blake3 hash as 256-bit multihash
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.code","title":"code(bits = ic.core_opts.instance_bits)
","text":"Encode digest as an ISCC Instance-Code unit.
Parameters:
Name Type Description Default bits
int
Number of bits for the ISCC Instance-Code
ic.core_opts.instance_bits
Returns:
Type Description str
ISCC Instance-Code
"},{"location":"units/code_meta/","title":"ISCC - Meta-Code","text":"A similarity preserving hash for digital asset metadata.
"},{"location":"units/code_meta/#purpose","title":"Purpose","text":"The Meta-Code is the first possible (optional) unit of an ISCC-CODE. It is calculated from the metadata of a digital asset. The primary purpose of the Meta-Code is to aid the discovery of digital assets with similar metadata and the detection of metadata anomalies. As a secondary function, Meta-Code processing also creates a secure Meta-Hash for cryptogrpahic binding purposes.
"},{"location":"units/code_meta/#inputs","title":"Inputs","text":"The metadata supplied for Meta-Code calculation is called Seed-Metadata. Seed-Metadata has 3 possible elements:
- name (required): The name or title of the work manifested by the digital asset.
- description (optional): A disambiguating textual description of the digital asset.
- meta (optional): Industry-sector or use-case specific metadata, encoded as Data-URL (RFC-2397).
Note
Due to the broad applicability of the ISCC we do not prescribe a particular schema for the meta
-element. Instead we use the Data-URL format because it can encode and self-describe any conceivable metadata in a sufficently machine-interpretable form at any desired specificity.
Data-URL Examples:
- Metadata is \"some\" JSON:
data:application/json;base64,<data>
- Metadata is JSON-LD:
data:application/ld+json;base64,<data>
- Metadata is \"some\" XML:
data:application/xml;base64,<data>
- Metadata is MARC21 XML:
data:application/marcxml+xml;base64,<data>
- Metadata is IPTC NewsML:
data:application/vnd.iptc.g2.newsitem+xml;base64,<data>
Data-URLs are also supported by all major internet browsers.
"},{"location":"units/code_meta/#processing","title":"Processing","text":""},{"location":"units/code_meta/#meta-code","title":"Meta-Code","text":"The first 32-bits of a Meta-Code are calculated as a simliarity hash from the name
field. The second 32-bits are also calculated from the name
field if no other input was supplied. If description
is suplied but no meta
, the description
will be used for the second 32-bits. If meta
is supplied it will be used in favour of description
for the second 32-bits.
flowchart LR\n B{name?}\n B -->|Yes| J[\"P1 = SH(name)\"]\n J --> D{meta?}\n D -->|Yes| F[\"Meta-Code = P1 + SH(meta)\"]\n D -->|No| G{description?}\n G -->|Yes| H[\"Meta-Code = P1 + SH(description)\"]\n G -->|No| I[\"Meta-Code = P1\"]\n B -->|No| E[Skip Meta-Code]
Note
To support automation and reproducibility, applications that generate ISCCs, should prioritize metadata that is automatically extracted from the digital asset.
If embedded metadata is not available or known to be unreliable an application should rely on external metadata or explicitly ask users to supply at least the name
-field. Applications should then first embed metadata into the asset before calculating the ISCC-CODE. This ensures that the embedded metadata is bound to the asset by the Instance-Code.
If neither embedded nor external metadata is available, the application may resort to use the filename of the digital asset as value for the name
-field. If no value can be determined for the name
-field, an application shall skip generation of a Meta-Code and create an ISCC-CODE without a Meta-Code.
"},{"location":"units/code_meta/#meta-hash","title":"Meta-Hash","text":"In addition to the Meta-Code we also create a cryptographic hash (the Meta-Hash) of the supplied Seed-Metadata. It is used to securely bind metadata to the digital asset.
flowchart LR\n N{name?}\n N -->|Yes| A{meta?}\n N -->|No| E[Skip Meta-Hash]\n A -->|Yes| H[\"Meta-Hash = H(meta)\"]\n A -->|No| B{description?}\n B -->|Yes| HND[\"Meta-Hash = H(name + description)\"]\n B -->|No| HN[\"Meta-Hash = H(name)\"]
"},{"location":"units/code_meta/#functions","title":"Functions","text":""},{"location":"units/code_meta/#iscc_core.code_meta.gen_meta_code_v0","title":"gen_meta_code_v0(name, description = None, meta = None, bits = ic.core_opts.meta_bits)
","text":"Create an ISCC Meta-Code with the algorithm version 0.
Parameters:
Name Type Description Default name
str
Name or title of the work manifested by the digital asset
required description
Optional[str]
Optional description for disambiguation
None
meta
Optional[Union[dict,str]
Dict or Data-URL string with extended metadata
None
bits
int
Bit-length of resulting Meta-Code (multiple of 64)
ic.core_opts.meta_bits
Returns:
Type Description dict
ISCC object with possible fields: iscc, name, description, metadata, metahash
Source code in iscc_core\\code_meta.py
def gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits):\n# type: (str, Optional[str], Optional[ic.Meta], int) -> dict\n\"\"\"\n Create an ISCC Meta-Code with the algorithm version 0.\n :param str name: Name or title of the work manifested by the digital asset\n :param Optional[str] description: Optional description for disambiguation\n :param Optional[Union[dict,str] meta: Dict or Data-URL string with extended metadata\n :param int bits: Bit-length of resulting Meta-Code (multiple of 64)\n :return: ISCC object with possible fields: iscc, name, description, metadata, metahash\n :rtype: dict\n \"\"\"\n# 1. Normalize `name`\nname = \"\" if name is None else name\nname = text_clean(name)\nname = text_remove_newlines(name)\nname = text_trim(name, ic.core_opts.meta_trim_name)\nif not name:\nraise ValueError(\"Meta-Code requires non-empty name element (after normalization)\")\n# 2. Normalize `description`\ndescription = \"\" if description is None else description\ndescription = text_clean(description)\ndescription = text_trim(description, ic.core_opts.meta_trim_description)\n# Calculate meta_code, metahash, and output metadata values for the different input cases\nif meta:\nif isinstance(meta, str):\n# Data-URL expected\ndurl = meta\npayload = DataURL.from_url(durl).data\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = durl\nelif isinstance(meta, dict):\npayload = jcs.canonicalize(meta)\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmedia_type = \"application/ld+json\" if \"@context\" in meta else \"application/json\"\ndurl_obj = DataURL.from_data(media_type, base64_encode=True, data=payload)\nmetadata_value = durl_obj.url\nelse:\nraise TypeError(f\"metadata must be Data-URL string or dict not {type(meta)}\")\nelse:\npayload = \" \".join((name, description)).strip().encode(\"utf-8\")\nmeta_code_digest = soft_hash_meta_v0(name, description)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = None\nmeta_code = ic.encode_component(\nmtype=ic.MT.META,\nstype=ic.ST.NONE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=meta_code_digest,\n)\niscc = \"ISCC:\" + meta_code\n# Build result\nresult = {\"iscc\": iscc}\nif name:\nresult[\"name\"] = name\nif description:\nresult[\"description\"] = description\nif metadata_value:\nresult[\"meta\"] = metadata_value\nresult[\"metahash\"] = metahash\nreturn result\n
"},{"location":"units/code_meta/#iscc_core.code_meta.soft_hash_meta_v0","title":"soft_hash_meta_v0(name, extra = None)
","text":"Calculate simmilarity preserving 256-bit hash digest from asset metadata.
Textual input should be stripped of markup, normalized and trimmed before hashing. Bytes input can be any serialized metadata (JSON, XML, Image...). Metadata should be serialized in a canonical form (for example JCS for JSON)
Note
The processing algorithm depends on the type of the extra
input. If the extra
field is supplied and non-empty, we create separate hashes for name
and extra
and interleave them in 32-bit chunks:
-
If the extra
input is None
or an empty str
/bytes
object the Meta-Hash will be generated from the name
-field only.
-
If the extra
-input is a non-empty text string (str) the string is lower-cased and the processing units are utf-8 endoded characters (possibly multibyte).
-
If the extra
-input is a non-empty bytes object the processing is done bytewise.
Parameters:
Name Type Description Default name
str
Title of the work manifested in the digital asset
required extra
Union[str,bytes,None]
Additional metadata for disambiguation
None
Returns:
Type Description bytes
256-bit simhash digest for Meta-Code
Source code in iscc_core\\code_meta.py
def soft_hash_meta_v0(name, extra=None):\n# type: (str, Union[str,bytes,None]) -> bytes\n\"\"\"\n Calculate simmilarity preserving 256-bit hash digest from asset metadata.\n Textual input should be stripped of markup, normalized and trimmed before hashing.\n Bytes input can be any serialized metadata (JSON, XML, Image...). Metadata should be\n serialized in a canonical form (for example\n [JCS](https://tools.ietf.org/id/draft-rundgren-json-canonicalization-scheme-00.html) for JSON)\n !!! note\n The processing algorithm depends on the type of the `extra` input.\n If the `extra` field is supplied and non-empty, we create separate hashes for\n `name` and `extra` and interleave them in 32-bit chunks:\n - If the `extra` input is `None` or an empty `str`/`bytes` object the Meta-Hash will\n be generated from the `name`-field only.\n - If the `extra`-input is a non-empty **text** string (str) the string is\n lower-cased and the processing units are utf-8 endoded characters (possibly multibyte).\n - If the `extra`-input is a non-empty **bytes** object the processing is done bytewise.\n :param str name: Title of the work manifested in the digital asset\n :param Union[str,bytes,None] extra: Additional metadata for disambiguation\n :return: 256-bit simhash digest for Meta-Code\n :rtype: bytes\n \"\"\"\nname = ic.text_collapse(name)\nname_n_grams = ic.sliding_window(name, width=ic.core_opts.meta_ngram_size_text)\nname_hash_digests = [blake3(s.encode(\"utf-8\")).digest() for s in name_n_grams]\nsimhash_digest = ic.alg_simhash(name_hash_digests)\nif extra in {None, \"\", b\"\"}:\nreturn simhash_digest\nelse:\n# Augment with interleaved hash for extra metadata\nif isinstance(extra, bytes):\n# Raw bytes are handled per byte\nextra_n_grams = ic.sliding_window(extra, width=ic.core_opts.meta_ngram_size_bytes)\nextra_hash_digests = [blake3(ngram).digest() for ngram in extra_n_grams]\nelif isinstance(extra, str):\n# Text is collapsed and handled per character (multibyte)\nextra = ic.text_collapse(extra)\nextra_n_grams = ic.sliding_window(extra, width=ic.core_opts.meta_ngram_size_text)\nextra_hash_digests = [blake3(s.encode(\"utf-8\")).digest() for s in extra_n_grams]\nelse:\nraise ValueError(\"parameter `extra` must be of type str or bytes!\")\nextra_simhash_digest = ic.alg_simhash(extra_hash_digests)\n# Interleave first half of name and extra simhashes in 32-bit chunks\nchunks_simhash_digest = sliced(simhash_digest[:16], 4)\nchunks_extra_simhash_digest = sliced(extra_simhash_digest[:16], 4)\ninterleaved = interleave(chunks_simhash_digest, chunks_extra_simhash_digest)\nsimhash_digest = bytearray()\nfor chunk in interleaved:\nsimhash_digest += chunk\nsimhash_digest = bytes(simhash_digest)\nreturn simhash_digest\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_clean","title":"text_clean(text)
","text":"Clean text for display.
- Normalize with NFKC normalization.
- Remove Control Characters (except newlines)
- Reduce multiple consecutive newlines to a maximum of two newlines
- Strip leading and trailing whitespace
Source code in iscc_core\\code_meta.py
def text_clean(text):\n# type: (str) -> str\n\"\"\"\n Clean text for display.\n - Normalize with NFKC normalization.\n - Remove Control Characters (except newlines)\n - Reduce multiple consecutive newlines to a maximum of two newlines\n - Strip leading and trailing whitespace\n \"\"\"\n# Unicode normalize\ntext = unicodedata.normalize(\"NFKC\", text)\n# Remove control characters\ntext = \"\".join(\nch for ch in text if unicodedata.category(ch)[0] != \"C\" or ch in ic.core_opts.text_newlines\n)\n# Collapse more than two consecutive newlines\nchars = []\nnewline_count = 0\nfor c in text:\nif c in ic.core_opts.text_newlines:\nif newline_count < 2:\nchars.append(\"\\n\")\nnewline_count += 1\ncontinue\nelse:\nnewline_count = 0\nchars.append(c)\ntext = \"\".join(chars)\nreturn text.strip()\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_remove_newlines","title":"text_remove_newlines(text)
","text":"Remove newlines.
The name
field serves as a displayable title. We remove newlines and leading and trailing whitespace. We also collapse consecutive spaces to single spaces.
Parameters:
Name Type Description Default text
Text for newline removal
required Returns:
Type Description str
Single line of text
Source code in iscc_core\\code_meta.py
def text_remove_newlines(text):\n# type: (str) -> str\n\"\"\"\n Remove newlines.\n The `name` field serves as a displayable title. We remove newlines and leading and trailing\n whitespace. We also collapse consecutive spaces to single spaces.\n :param text: Text for newline removal\n :return: Single line of text\n :rtype: str\n \"\"\"\nreturn \" \".join(text.split())\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_trim","title":"text_trim(text, nbytes)
","text":"Trim text such that its utf-8 encoded size does not exceed nbytes
.
Source code in iscc_core\\code_meta.py
def text_trim(text, nbytes):\n# type: (str, int) -> str\n\"\"\"Trim text such that its utf-8 encoded size does not exceed `nbytes`.\"\"\"\nreturn text.encode(\"utf-8\")[:nbytes].decode(\"utf-8\", \"ignore\").strip()\n
"},{"location":"units/content/","title":"ISCC - Content-Codes","text":""},{"location":"units/content/code_content_audio/","title":"ISCC - Audio-Code","text":"A similarity preserving hash for audio content (soft hash).
Creates an ISCC object that provides an iscc
-field with an Audio-Code and a duration
-field.
The Content-Code Audio is generated from a Chromaprint fingerprint provided as a vector of 32-bit signed integers. The iscc-sdk uses fpcalc to extract Chromaprint vectors with the following command line parameters:
$ fpcalc -raw -json -signed -length 0 myaudiofile.mp3
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.gen_audio_code","title":"gen_audio_code(cv, bits = ic.core_opts.audio_bits)
","text":"Create an ISCC Content-Code Audio with the latest standard algorithm.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.gen_audio_code_v0","title":"gen_audio_code_v0(cv, bits = ic.core_opts.audio_bits)
","text":"Create an ISCC Content-Code Audio with algorithm v0.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.soft_hash_audio_v0","title":"soft_hash_audio_v0(cv, bits = ic.core_opts.audio_bits)
","text":"Create audio similarity hash from a chromaprint vector.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting similarity hash (multiple of 32)
ic.core_opts.audio_bits
Returns:
Type Description bytes
Audio-Hash digest
"},{"location":"units/content/code_content_image/","title":"ISCC - Image-Code","text":"A similarity preserving perceptual hash for images.
The ISCC Content-Code Image is created by calculating a discrete cosine transform on normalized image-pixels and comparing the values from the upper left area of the dct-matrix against their median values to set the hash-bits.
Images must be normalized before using gen_image_code. Prepare images as follows:
- Transpose image according to EXIF Orientation
- Add white background to image if it has alpha transparency
- Crop empty borders of image
- Convert image to grayscale
- Resize image to 32x32
- Flatten 32x32 matrix to an array of 1024 grayscale (uint8) pixel values
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.gen_image_code","title":"gen_image_code(pixels, bits = ic.core_opts.image_bits)
","text":"Create an ISCC Content-Code Image with the latest standard algorithm.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values).
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.gen_image_code_v0","title":"gen_image_code_v0(pixels, bits = ic.core_opts.image_bits)
","text":"Create an ISCC Content-Code Image with algorithm v0.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values)
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.soft_hash_image_v0","title":"soft_hash_image_v0(pixels, bits = ic.core_opts.image_bits)
","text":"Calculate image hash from normalized grayscale pixel sequence of length 1024.
Parameters:
Name Type Description Default pixels
Sequence[int]
required bits
int
Bit-length of image hash (default 64).
ic.core_opts.image_bits
Returns:
Type Description bytes
Similarity preserving Image-Hash digest.
"},{"location":"units/content/code_content_mixed/","title":"ISCC - Mixed Code","text":"A similarity hash for mixed media content.
Creates an ISCC object that provides a iscc
-field a Mixed-Code and a parts
-field that lists the input codes.
Many digital assets embed multiple assets of different mediatypes in a single file. Text documents may include images, video includes audio in most cases. The ISCC Content-Code-Mixed encodes the similarity of a collection of assets of the same or different mediatypes that may occur in a multimedia asset.
Applications that create mixed Content-Codes must be capable to extract embedded assets and create individual Content-Codes per asset.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.gen_mixed_code","title":"gen_mixed_code(codes, bits = ic.core_opts.mixed_bits)
","text":"Create an ISCC Content-Code Mixed with the latest standard algorithm.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.gen_mixed_code_v0","title":"gen_mixed_code_v0(codes, bits = ic.core_opts.mixed_bits)
","text":"Create an ISCC Content-Code-Mixed with algorithm v0.
If the provided codes are of mixed length they are stripped to bits
length for calculation.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.soft_hash_codes_v0","title":"soft_hash_codes_v0(cc_digests, bits = ic.core_opts.mixed_bits)
","text":"Create a similarity hash from multiple Content-Code digests.
The similarity hash is created from the bodies of the input codes with the first byte of the code-header prepended.
All codes must be of main-type CONTENT and have a minimum length of bits
.
Parameters:
Name Type Description Default cc_digests
Sequence[bytes]
a list of Content-Code digests.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description bytes
Similarity preserving byte hash.
"},{"location":"units/content/code_content_text/","title":"ISCC - Text Code","text":"A similarity preserving hash for plain-text content (soft hash).
The ISCC Text-Code is generated from plain-text that has been extracted from a media assets.
Warning
Plain-text extraction from documents in various formats (especially PDF) may yield very diffent results depending on the extraction tools being used. The iscc-sdk uses Apache Tika to extract text from documents for Text-Code generation.
Algorithm overview
- Apply
text_collapse
function to text input - Count characters of collapsed text
- Apply
soft_hash_text_v0
to collapsed text
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.gen_text_code_v0","title":"gen_text_code_v0(text, bits = ic.core_opts.text_bits)
","text":"Create an ISCC Text-Code with algorithm v0.
Note
Any markup (like HTML tags or markdown) should be removed from the plain-text before passing it to this function.
Parameters:
Name Type Description Default text
str
Text for Text-Code creation
required bits
int
Bit-length of ISCC Code Hash (default 64)
ic.core_opts.text_bits
Returns:
Type Description dict
ISCC schema instance with Text-Code and an aditional property characters
Source code in iscc_core\\code_content_text.py
def gen_text_code_v0(text, bits=ic.core_opts.text_bits):\n# type: (str, int) -> dict\n\"\"\"\n Create an ISCC Text-Code with algorithm v0.\n !!! note\n Any markup (like HTML tags or markdown) should be removed from the plain-text\n before passing it to this function.\n :param str text: Text for Text-Code creation\n :param int bits: Bit-length of ISCC Code Hash (default 64)\n :return: ISCC schema instance with Text-Code and an aditional property `characters`\n :rtype: dict\n \"\"\"\ntext = text_collapse(text)\ncharacters = len(text)\ndigest = soft_hash_text_v0(text)\ntext_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.TEXT,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + text_code\nreturn dict(iscc=iscc, characters=characters)\n
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.text_collapse","title":"text_collapse(text)
","text":"Normalize and simplify text for similarity hashing.
- Decompose with NFD normalization.
- Remove all whitespace characters and convert text to lower case
- Filter control characters, marks (diacritics), and punctuation
- Recombine with NFKC normalization.
Note
See: Unicode normalization.
Parameters:
Name Type Description Default text
str
Plain text to be collapsed.
required Returns:
Type Description str
Collapsed plain text.
Source code in iscc_core\\code_content_text.py
def text_collapse(text):\n# type: (str) -> str\n\"\"\"\n Normalize and simplify text for similarity hashing.\n - Decompose with NFD normalization.\n - Remove all whitespace characters and convert text to lower case\n - Filter control characters, marks (diacritics), and punctuation\n - Recombine with NFKC normalization.\n !!! note\n See: [Unicode normalization](https://unicode.org/reports/tr15/).\n :param str text: Plain text to be collapsed.\n :return: Collapsed plain text.\n :rtype: str\n \"\"\"\n# Decompose with NFD\ntext = unicodedata.normalize(\"NFD\", text)\n# Remove all whitespace and convert text to lower case\ntext = \"\".join(text.split()).lower()\n# Filter control characters, marks (diacritics), and punctuation\ntext = \"\".join(\nch for ch in text if unicodedata.category(ch)[0] not in ic.core_opts.text_unicode_filter\n)\n# Recombine\ntext = unicodedata.normalize(\"NFKC\", text)\nreturn text\n
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.soft_hash_text_v0","title":"soft_hash_text_v0(text)
","text":"Creates a 256-bit similarity preserving hash for text input with algorithm v0.
- Slide over text with a
text_ngram_size
wide window and create xxh32
hashes - Create a
minhash_256
from the hashes generated in the previous step.
Note
Before passing text to this function it must be:
- stripped of markup
- normalized
- stripped of whitespace
- lowercased
Parameters:
Name Type Description Default text
str
Plain text to be hashed.
required Returns:
Type Description bytes
256-bit similarity preserving byte hash.
Source code in iscc_core\\code_content_text.py
def soft_hash_text_v0(text):\n# type: (str) -> bytes\n\"\"\"\n Creates a 256-bit similarity preserving hash for text input with algorithm v0.\n - Slide over text with a\n [`text_ngram_size`][iscc_core.options.CoreOptions.text_ngram_size] wide window\n and create [`xxh32`](https://cyan4973.github.io/xxHash/) hashes\n - Create a [`minhash_256`][iscc_core.minhash.alg_minhash_256] from the hashes generated\n in the previous step.\n !!! note\n Before passing text to this function it must be:\n - stripped of markup\n - normalized\n - stripped of whitespace\n - lowercased\n :param str text: Plain text to be hashed.\n :return: 256-bit similarity preserving byte hash.\n :rtype: bytes\n \"\"\"\nngrams = ic.sliding_window(text, ic.core_opts.text_ngram_size)\nfeatures = [xxhash.xxh32_intdigest(s.encode(\"utf-8\")) for s in ngrams]\nhash_digest = ic.alg_minhash_256(features)\nreturn hash_digest\n
"},{"location":"units/content/code_content_video/","title":"ISCC - Video-Code","text":"A similarity preserving hash for video content
The Content-Code Video is generated from MPEG-7 video frame signatures. The iscc-sdk uses ffmpeg to extract frame signatures with the following command line parameters:
$ ffmpeg -i video.mpg -vf fps=fps=5,signature=format=xml:filename=sig.xml -f null -
The relevant frame signatures can be parsed from the following elements in sig.xml:
<FrameSignature>0 0 0 1 0 0 1 0 1 1 0 0 1 1 ...</FrameSignature>
Tip
It is also possible to extract the signatures in a more compact binary format. But the format requires a custom binary parser to decode the frame signaturs.
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.gen_video_code","title":"gen_video_code(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Create an ISCC Video-Code with the latest standard algorithm.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Instance-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.gen_video_code_v0","title":"gen_video_code_v0(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Create an ISCC Video-Code with algorithm v0.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.soft_hash_video_v0","title":"soft_hash_video_v0(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Compute video hash v0 from MP7 frame signatures.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
2D matrix of MP7 frame signatures
required bits
int
Bit-length of resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
"},{"location":"utilities/utils/","title":"ISCC - Utilities","text":""},{"location":"utilities/utils/#iscc_core.utils.json_canonical","title":"json_canonical(obj)
","text":"Canonical, deterministic serialization of ISCC metadata.
We serialize ISCC metadata in a deterministic/reproducible manner by using JCS (RFC 8785) canonicalization.
Source code in iscc_core\\utils.py
def json_canonical(obj):\n# type: (Any) -> bytes\n\"\"\"\n Canonical, deterministic serialization of ISCC metadata.\n We serialize ISCC metadata in a deterministic/reproducible manner by using\n [JCS (RFC 8785)](https://datatracker.ietf.org/doc/html/rfc8785) canonicalization.\n \"\"\"\nser = jcs.canonicalize(obj)\ndes = json.loads(ser)\nif des != obj:\nraise ValueError(f\"Not canonicalizable {obj} round-trips to {des}\")\nreturn ser\n
"},{"location":"utilities/utils/#iscc_core.utils.sliding_window","title":"sliding_window(seq, width)
","text":"Generate a sequence of equal \"width\" slices each advancing by one elemnt.
All types that have a length and can be sliced are supported (list, tuple, str ...). The result type matches the type of the input sequence. Fragment slices smaller than the width at the end of the sequence are not produced. If \"witdh\" is smaller than the input sequence than one element will be returned that is shorter than the requested width.
Parameters:
Name Type Description Default seq
Sequence
Sequence of values to slide over
required width
int
Width of sliding window in number of items
required Returns:
Type Description Generator
A generator of window sized items
Source code in iscc_core\\utils.py
def sliding_window(seq, width):\n# type: (Sequence, int) -> Generator\n\"\"\"\n Generate a sequence of equal \"width\" slices each advancing by one elemnt.\n All types that have a length and can be sliced are supported (list, tuple, str ...).\n The result type matches the type of the input sequence.\n Fragment slices smaller than the width at the end of the sequence are not produced.\n If \"witdh\" is smaller than the input sequence than one element will be returned that\n is shorter than the requested width.\n :param Sequence seq: Sequence of values to slide over\n :param int width: Width of sliding window in number of items\n :returns: A generator of window sized items\n :rtype: Generator\n \"\"\"\nif width < 2:\nraise AssertionError(\"Sliding window width must be 2 or bigger.\")\nidx = range(max(len(seq) - width + 1, 1))\nreturn (seq[i : i + width] for i in idx)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_compare","title":"iscc_compare(a, b)
","text":"Calculate separate hamming distances of compatible components of two ISCCs
Returns:
Type Description dict
A dict with keys meta_dist, semantic_dist, content_dist, data_dist, instance_match
Source code in iscc_core\\utils.py
def iscc_compare(a, b):\n# type: (str, str) -> dict\n\"\"\"\n Calculate separate hamming distances of compatible components of two ISCCs\n :return: A dict with keys meta_dist, semantic_dist, content_dist, data_dist, instance_match\n :rtype: dict\n \"\"\"\nac = [ic.Code(unit) for unit in ic.iscc_decompose(a)]\nbc = [ic.Code(unit) for unit in ic.iscc_decompose(b)]\nresult = {}\nfor ca in ac:\nfor cb in bc:\ncat = (ca.maintype, ca.subtype, ca.version)\ncbt = (cb.maintype, cb.subtype, ca.version)\nif cat == cbt:\nif ca.maintype != ic.MT.INSTANCE:\nresult[ca.maintype.name.lower() + \"_dist\"] = iscc_distance_bytes(\nca.hash_bytes, cb.hash_bytes\n)\nelse:\nresult[\"instance_match\"] = ca.hash_bytes == cb.hash_bytes\nreturn result\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_similarity","title":"iscc_similarity(a, b)
","text":"Calculate similarity of ISCC codes as a percentage value (0-100).
MainType, SubType, Version and Length of the codes must be the same.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description int
Similarity of ISCC a and b in percent (based on hamming distance)
Source code in iscc_core\\utils.py
def iscc_similarity(a, b):\n# type: (str, str) -> int\n\"\"\"\n Calculate similarity of ISCC codes as a percentage value (0-100).\n MainType, SubType, Version and Length of the codes must be the same.\n :param a: ISCC a\n :param b: ISCC b\n :return: Similarity of ISCC a and b in percent (based on hamming distance)\n :rtype: int\n \"\"\"\na, b = iscc_pair_unpack(a, b)\nhdist = iscc_distance_bytes(a, b)\nnbits = len(a) * 8\nsim = int(((nbits - hdist) / nbits) * 100)\nreturn sim\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_distance","title":"iscc_distance(a, b)
","text":"Calculate hamming distance of ISCC codes.
MainType, SubType, Version and Length of the codes must be the same.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description int
Hamming distanced in number of bits.
Source code in iscc_core\\utils.py
def iscc_distance(a, b):\n# type: (str, str) -> int\n\"\"\"\n Calculate hamming distance of ISCC codes.\n MainType, SubType, Version and Length of the codes must be the same.\n :param a: ISCC a\n :param b: ISCC b\n :return: Hamming distanced in number of bits.\n :rtype: int\n \"\"\"\na, b = iscc_pair_unpack(a, b)\nreturn iscc_distance_bytes(a, b)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_distance_bytes","title":"iscc_distance_bytes(a, b)
","text":"Calculate hamming distance for binary hash digests of equal length.
Parameters:
Name Type Description Default a
bytes
binary hash digest
required b
bytes
binary hash digest
required Returns:
Type Description int
Hamming distance in number of bits.
Source code in iscc_core\\utils.py
def iscc_distance_bytes(a, b):\n# type: (bytes, bytes) -> int\n\"\"\"\n Calculate hamming distance for binary hash digests of equal length.\n :param bytes a: binary hash digest\n :param bytes b: binary hash digest\n :return: Hamming distance in number of bits.\n :rtype: int\n \"\"\"\nif len(a) != len(b):\nraise AssertionError(f\"Hash diggest of unequal length: {len(a)} vs {len(b)}\")\nba, bb = bitarray(), bitarray()\nba.frombytes(a)\nbb.frombytes(b)\nreturn count_xor(ba, bb)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_pair_unpack","title":"iscc_pair_unpack(a, b)
","text":"Unpack two ISCC codes and return their body hash digests if their headers match.
Headers match if their MainType, SubType, and Version are identical.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description Tuple[bytes, bytes]
Tuple with hash digests of a and b
Raises:
Type Description ValueError
If ISCC headers don\u00b4t match
Source code in iscc_core\\utils.py
def iscc_pair_unpack(a, b):\n# type: (str, str) -> Tuple[bytes, bytes]\n\"\"\"\n Unpack two ISCC codes and return their body hash digests if their headers match.\n Headers match if their MainType, SubType, and Version are identical.\n :param a: ISCC a\n :param b: ISCC b\n :return: Tuple with hash digests of a and b\n :rtype: Tuple[bytes, bytes]\n :raise ValueError: If ISCC headers don\u00b4t match\n \"\"\"\na, b = ic.iscc_clean(ic.iscc_normalize(a)), ic.iscc_clean(ic.iscc_normalize(b))\na, b = ic.decode_base32(a), ic.decode_base32(b)\na, b = ic.decode_header(a), ic.decode_header(b)\nif not a[:-1] == b[:-1]:\nraise ValueError(f\"ISCC headers don\u00b4t match: {a}, {b}\")\nreturn a[-1], b[-1]\n
"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"ISCC - Codec & Algorithms","text":"iscc-core
is the reference implementation of the core algorithms of the ISCC (International Standard Content Code)
"},{"location":"#what-is-the-iscc","title":"What is the ISCC","text":"The ISCC is a similarity preserving fingerprint and identifier for digital media assets.
ISCCs are generated algorithmically from digital content, just like cryptographic hashes. However, instead of using a single cryptographic hash function to identify data only, the ISCC uses various algorithms to create a composite identifier that exhibits similarity-preserving properties (soft hash).
The component-based structure of the ISCC identifies content at multiple levels of abstraction. Each component is self-describing, modular, and can be used separately or with others to aid in various content identification tasks. The algorithmic design supports content deduplication, database synchronization, indexing, integrity verification, timestamping, versioning, data provenance, similarity clustering, anomaly detection, usage tracking, allocation of royalties, fact-checking and general digital asset management use-cases.
"},{"location":"#what-is-iscc-core","title":"What is iscc-core
","text":"iscc-core
is a python based reference library of the core algorithms to create standard-compliant ISCC codes. It also a good reference for porting ISCC to other programming languages.
Tip
This is a low level reference implementation that does not inlcude features like mediatype detection, metadata extraction or file format specific content extraction. Please have a look at the iscc-sdk which adds those higher level features on top of the iscc-core
library.
"},{"location":"#project-status","title":"Project Status","text":"The ISCC is under development as ISO/CD 24138 - International Standard Content Code within ISO/TC 46/SC 9/WG 18.
"},{"location":"#iscc-architecture","title":"ISCC Architecture","text":""},{"location":"#iscc-maintypes","title":"ISCC MainTypes","text":"Idx Slug Bits Purpose 0 META 0000 Match on metadata similarity 1 SEMANTIC 0001 Match on semantic content similarity 2 CONTENT 0010 Match on perceptual content similarity 3 DATA 0011 Match on data similarity 4 INSTANCE 0100 Match on data identity 5 ISCC 0101 Composite of two or more components with common header"},{"location":"#installation","title":"Installation","text":"Use the package manager pip to install iscc-core
.
pip install iscc-core\n
"},{"location":"#quick-start","title":"Quick Start","text":"import json\nimport iscc_core as ic\nmeta_code = ic.gen_meta_code(name=\"ISCC Test Document!\")\nprint(f\"Meta-Code: {meta_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(meta_code['iscc'])}\\n\")\n# Extract text from file\nwith open(\"demo.txt\", \"rt\", encoding=\"utf-8\") as stream:\ntext = stream.read()\ntext_code = ic.gen_text_code_v0(text)\nprint(f\"Text-Code: {text_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(text_code['iscc'])}\\n\")\n# Process raw bytes of textfile\nwith open(\"demo.txt\", \"rb\") as stream:\ndata_code = ic.gen_data_code(stream)\nprint(f\"Data-Code: {data_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(data_code['iscc'])}\\n\")\nstream.seek(0)\ninstance_code = ic.gen_instance_code(stream)\nprint(f\"Instance-Code: {instance_code['iscc']}\")\nprint(f\"Structure: {ic.iscc_explain(instance_code['iscc'])}\\n\")\n# Combine ISCC-UNITs into ISCC-CODE\niscc_code = ic.gen_iscc_code(\n(meta_code[\"iscc\"], text_code[\"iscc\"], data_code[\"iscc\"], instance_code[\"iscc\"])\n)\n# Create convenience `Code` object from ISCC string\niscc_obj = ic.Code(iscc_code[\"iscc\"])\nprint(f\"ISCC-CODE: {ic.iscc_normalize(iscc_obj.code)}\")\nprint(f\"Structure: {iscc_obj.explain}\")\nprint(f\"Multiformat: {iscc_obj.mf_base32}\\n\")\n# Compare with changed ISCC-CODE:\nnew_dc, new_ic = ic.Code.rnd(mt=ic.MT.DATA), ic.Code.rnd(mt=ic.MT.INSTANCE)\nnew_iscc = ic.gen_iscc_code((meta_code[\"iscc\"], text_code[\"iscc\"], new_dc.uri, new_ic.uri))\nprint(f\"Compare ISCC-CODES:\\n{iscc_obj.uri}\\n{new_iscc['iscc']}\")\nprint(json.dumps(ic.iscc_compare(iscc_obj.code, new_iscc[\"iscc\"]), indent=2))\n
The output of this example is as follows:
Meta-Code: ISCC:AAAT4EBWK27737D2\nStructure: META-NONE-V0-64-3e103656bffdfc7a\n\nText-Code: ISCC:EAAQMBEYQF6457DP\nStructure: CONTENT-TEXT-V0-64-060498817dcefc6f\n\nData-Code: ISCC:GAA7UJMLDXHPPENG\nStructure: DATA-NONE-V0-64-fa258b1dcef791a6\n\nInstance-Code: ISCC:IAA3Y7HR2FEZCU4N\nStructure: INSTANCE-NONE-V0-64-bc7cf1d14991538d\n\nISCC-CODE: ISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI\nStructure: ISCC-TEXT-V0-MCDI-3e103656bffdfc7a060498817dcefc6ffa258b1dcef791a6bc7cf1d14991538d\nMultiformat: bzqavabj6ca3fnp757r5ambeyqf6457dp7isywhoo66i2npd46hiutektru\n\nCompare ISCC-CODES:\nISCC:KACT4EBWK27737D2AYCJRAL5Z36G76RFRMO4554RU26HZ4ORJGIVHDI\nISCC:KACT4EBWK27737D2AYCJRAL5Z36G7Y7HA2BMECKMVRBEQXR2BJOS6NA\n{\n \"meta_dist\": 0,\n \"content_dist\": 0,\n \"data_dist\": 33,\n \"instance_match\": false\n}\n
"},{"location":"#documentation","title":"Documentation","text":"Documentation is published athttps://core.iscc.codes
"},{"location":"#development","title":"Development","text":"Requirements
- Python 3.7.2 or higher for code generation and static site building.
- Poetry for installation and dependency management.
Development Setup
git clone https://github.com/iscc/iscc-core.git\ncd iscc-core\npoetry install\n
Development Tasks
Tests, coverage, code formatting and other tasks can be run with the poe
command:
poe\n\nPoe the Poet - A task runner that works well with poetry.\nversion 0.18.1\n\nResult: No task specified.\n\nUSAGE\n poe [-h] [-v | -q] [--root PATH] [--ansi | --no-ansi] task [task arguments]\nGLOBAL OPTIONS\n -h, --help Show this help page and exit\n--version Print the version and exit\n-v, --verbose Increase command output (repeatable)\n-q, --quiet Decrease command output (repeatable)\n-d, --dry-run Print the task contents but don't actually run it\n --root PATH Specify where to find the pyproject.toml\n --ansi Force enable ANSI output\n --no-ansi Force disable ANSI output\nCONFIGURED TASKS\n gentests Generate conformance test data\n format Code style formating with black\n docs Copy README.md to /docs\n format-md Markdown formating with mdformat\n lf Convert line endings to lf\n test Run tests with coverage\n sec Security check with bandit\n all\n
Use poe all
to run all tasks before committing any changes.
"},{"location":"#maintainers","title":"Maintainers","text":"@titusz
"},{"location":"#contributing","title":"Contributing","text":"Pull requests are welcome. For significant changes, please open an issue first to discuss your plans. Please make sure to update tests as appropriate.
You may also want join our developer chat on Telegram at https://t.me/iscc_dev.
"},{"location":"changelog/","title":"Changelog","text":""},{"location":"changelog/#106-2023-12-15","title":"[1.0.6] - 2023-12-15","text":" - Added wheel package testing to CI
- Fixed incompatible dependencies
"},{"location":"changelog/#105-2023-12-07","title":"[1.0.5] - 2023-12-07","text":" - Improved simhash performance
- Added native dct and wtahash support
- Added Python 3.12 support
- Updated and relax dependencies
"},{"location":"changelog/#104-2023-06-05","title":"[1.0.4] - 2023-06-05","text":" - Removed bases dependency
- Fixed mkdocstrings
- Updated dependencies
"},{"location":"changelog/#103-2023-03-12","title":"[1.0.3] - 2023-03-12","text":" - Fix binary wheels
"},{"location":"changelog/#102-2023-03-12","title":"[1.0.2] - 2023-03-12","text":" - Publish binary wheels
"},{"location":"changelog/#101-2023-03-11","title":"[1.0.1] - 2023-03-11","text":" - Switch to standard bitarray module
- Switch tests to latest environments
- Add Python 3.11 support to TROVE classifiers
"},{"location":"changelog/#100-2023-01-24","title":"[1.0.0] - 2023-01-24","text":" - ISO/CD 24138 v1 Release
- Updateted dependencies
"},{"location":"changelog/#0214-2023-01-17","title":"[0.2.14] - 2023-01-17","text":" - Added ISO Reference documentation
- Removed non-standard conformance tests
- Fixed Meta-Code documentation
- Improved documentation CSS
"},{"location":"changelog/#0213-2023-01-16","title":"[0.2.13] - 2023-01-16","text":" - Added documentation for options
- Added python 3.11 support
- Added Markdown formating
- Added developmnet documentation
- Updated architecture figures
- Documentation cleanup
- Improved example code
- Optimized LF conversion
- Updated dependencies
"},{"location":"changelog/#0212-2022-11-24","title":"[0.2.12] - 2022-11-24","text":" - Fixed issue with data url compound media types
- Added ISCC version validation to
iscc_validate
- Added prefix check to
iscc_normalize
- Bundled fonts with documentation
- Updated dependencies
"},{"location":"changelog/#0211-2022-07-03","title":"[0.2.11] - 2022-07-03","text":" - Add support for gracefull build failures
"},{"location":"changelog/#0210-2022-07-03","title":"[0.2.10] - 2022-07-03","text":" - Fix pip instalation is missing setuptools
- Update mkdocs
"},{"location":"changelog/#029-2022-07-03","title":"[0.2.9] - 2022-07-03","text":" - Added iscc_compare function
- Optimized soft_hash_audio performance
- Removed Cython from build requirements
- Fixed api listing tool
- Updated codec architecture figure
- Updated dependencies
"},{"location":"changelog/#028-2022-04-21","title":"[0.2.8] - 2022-04-21","text":" - Fixed bug with subtype for semantic code
- Changed URI representation to upper case
- Changed to disallow ISCC-ID creation from ISCC-IDs
- Added line conversion tool
- Removed source wheel distribution
- Updated dependencies
"},{"location":"changelog/#027-2022-04-16","title":"[0.2.7] - 2022-04-16","text":" - Fixed bug in iscc_id_incr_v0
- Added support to accept ISCC-ID URI as input for iscc_id_incr_v0
- Added guard against custom subtype in random ISCC-CODE generation.
"},{"location":"changelog/#026-2022-04-13","title":"[0.2.6] - 2022-04-13","text":" - Added
KY
and MM
to valid prefixes - Added support to check for compiled extension modules
- Added universal wheel distribution
"},{"location":"changelog/#025-2022-04-10","title":"[0.2.5] - 2022-04-10","text":" - Fixed missing
jcs
dependency - Added SubType
NONE
to MT.ISCC to distinquish from SUM - Added support for deterministic generation of random ISCC-CODEs
- Added support for custom bit-sizes for random ISCC-CODEs
- Moved changelog into separate file
- Updated dependencies
"},{"location":"changelog/#024-2022-03-19","title":"[0.2.4] - 2022-03-19","text":" - Updated dependencies
- Added Flake.from_int and Flake.from_string
- Made Flake comparable and hashable
- Use standard hex encoded multihash for datahash and metahash
"},{"location":"changelog/#023-2022-03-06","title":"[0.2.3] - 2022-03-06","text":" - Update to iscc-schema 0.3.3
- Change image normalization instructions
- Fix issue with exporting cdc cython only functions
"},{"location":"changelog/#021-2022-03-03","title":"[0.2.1] - 2022-03-03","text":" - Cleanup and update dependencies
- Fix bitarray api change
- Fix developer commands
"},{"location":"changelog/#020-2022-02-24","title":"[0.2.0] - 2022-02-24","text":" - Complete API refactoring
- Use Data-URL as input for Meta-Code
- Use wallet address for ISCC-ID creation
- Added new Flake-Code (distributed time/random ID)
- Replaced assertions with exeptions
- Use secure random functions
- Retired Python 3.6 support (EOL)
- Return simple
dict
objects from generator functions - Added ISCC string validation
- Added multiple helper functions
"},{"location":"changelog/#019-2021-12-17","title":"[0.1.9] - 2021-12-17","text":" - Added warning on non-standard options
- Added multiformats support
- Added uri representation
- Removed redundant cdc_avg_chunk_size option
- Updated codec format documentation
"},{"location":"changelog/#018-2021-12-12","title":"[0.1.8] - 2021-12-12","text":" - Added conformance tests for all top level functions
- Added conformance tests to source dir
- Added conformance module with
selftest
function - Changed gen_image_code to accept normalized pixels instead of stream
- Changed opts to core_opts
- Removed image pre-processing and Pillow dependency
- Fixed readability of conformance tests
- Fixed soft_hash_video_v0 to accept non-tuple sequences
- Updated example code
"},{"location":"changelog/#017-2021-12-09","title":"[0.1.7] - 2021-12-09","text":" - Add dotenv for enviroment based configuration
- Cleanup package toplevel imports
- Return schema objects for iscc_code and iscc_id
- Exclude unset and none values from result dicts
- Add support for multiple code combinations for ISCC-CODE
- Add support for ISCC-ID based on singular Instance-Code
- Add initial conformance test system
"},{"location":"changelog/#016-2021-11-29","title":"[0.1.6] - 2021-11-29","text":" - Show counter for ISCC-ID in Code.explain
"},{"location":"changelog/#015-2021-11-28","title":"[0.1.5] - 2021-11-28","text":" - Fix documentation
- Change metahash creation logic
- Refactor models
- Add Content-Code-Mixed
- Add ISCC-ID
- Refactor
compose
to gen_iscc_code
- Refactor
models
to schema
"},{"location":"changelog/#014-2021-11-17","title":"[0.1.4] - 2021-11-17","text":" - Simplified options
- Optimize video WTA-hash for use with 64-bit granular features
"},{"location":"changelog/#013-2021-11-15","title":"[0.1.3] - 2021-11-15","text":" - Try to compile Cython/C accelerator modules when installing via pip
- Simplify soft_hash api return values
- Add .code() method to InstanceHasher, DataHasher
- Remove granular fingerprint calculation
- Add more top-level imports
"},{"location":"changelog/#012-2021-11-14","title":"[0.1.2] - 2021-11-14","text":" - Export more functions to toplevel
- Return schema driven objects from ISCC code generators.
"},{"location":"changelog/#011-2021-11-14","title":"[0.1.1] - 2021-11-14","text":" - Fix packaging problems
"},{"location":"changelog/#010-2021-11-13","title":"[0.1.0] - 2021-11-13","text":" - Initial release
"},{"location":"conformance/","title":"ISCC - Conformance Testing","text":"An application that claims ISCC conformance MUST pass all core functions from the ISCC conformance test suite. The test suite is available as JSON data on GitHub. Test data is structured as follows:
{\n\"<function_name>\": {\n\"<test_name>\": {\n\"inputs\": [\"<value1>\", \"<value2>\"],\n\"outputs\": [\"value1>\", \"<value2>\"]\n}\n}\n}\n
Inputs that are expected to be raw bytes or byte-streams
are embedded as HEX encoded strings in JSON and prefixed with stream:
or bytes
to support automated decoding during implementation testing.
Example
Byte-stream outputs in JSON test data:
\"gen_data_code_v0\": {\n\"test_0000_two_bytes_64\": {\n\"inputs\": [\n\"stream:ff00\",\n64\n],\n\"outputs\": {\n\"iscc\": \"GAAXL2XYM5BQIAZ3\"\n}\n},\n...\n
"},{"location":"conformance/#iscc_core.conformance.conformance_testdata","title":"conformance_testdata()
","text":"Yield tuples of test data.
Returns:
Type Description Generator[Tuple[str, Callable, List[Any], List[Any]]]
Tuple with testdata (test_name, func_obj, inputs, outputs)
"},{"location":"conformance/#iscc_core.conformance.conformance_selftest","title":"conformance_selftest()
","text":"Run conformance tests.
Returns:
Type Description bool
whether all tests passed
"},{"location":"constants/","title":"ISCC - Types and Constants","text":""},{"location":"constants/#iscc_core.constants.MT","title":"MT
","text":""},{"location":"constants/#iscc_core.constants.MT--mt-maintypes","title":"MT - MainTypes","text":"Uint Symbol Bits Purpose 0 META 0000 Match on metadata similarity 1 SEMANTIC 0001 Match on semantic content similarity 2 CONTENT 0010 Match on perceptual content similarity 3 DATA 0011 Match on data similarity 4 INSTANCE 0100 Match on data identity 5 ISCC 0101 Composite of two or more ISCC-UNITs with common header"},{"location":"constants/#iscc_core.constants.ST","title":"ST
","text":""},{"location":"constants/#iscc_core.constants.ST--st-subtypes","title":"ST - SubTypes","text":"Uint Symbol Bits Purpose 0 NONE 0000 For MainTypes that do not specify SubTypes"},{"location":"constants/#iscc_core.constants.ST_CC","title":"ST_CC
","text":""},{"location":"constants/#iscc_core.constants.ST_CC--st_cc","title":"ST_CC","text":"SubTypes for MT.CONTENT
Uint Symbol Bits Purpose 0 TEXT 0000 Match on syntactic text similarity 1 IMAGE 0001 Match on perceptual image similarity 2 AUDIO 0010 Match on audio chroma similarity 3 VIDEO 0011 Match on perceptual similarity 4 MIXED 0100 Match on similarity of content codes"},{"location":"constants/#iscc_core.constants.ST_ISCC","title":"ST_ISCC
","text":""},{"location":"constants/#iscc_core.constants.ST_ISCC--st_iscc","title":"ST_ISCC","text":"SubTypes for MT.ISCC
Uint Symbol Bits Purpose 0 TEXT 0000 Composite ISCC inlcuding Text-Code 1 IMAGE 0001 Composite ISCC inlcuding Image-Code 2 AUDIO 0010 Composite ISCC inlcuding Audio-Code 3 VIDEO 0011 Composite ISCC inlcuding Video-Code 4 MIXED 0100 Composite ISCC inlcuding Mixed-Code 5 SUM 0101 Composite ISCC inlcuding only Data- and Instance-Code 6 NONE 0110 Composite ISCC including Meta, Data and Instance-Code"},{"location":"constants/#iscc_core.constants.VS","title":"VS
","text":""},{"location":"constants/#iscc_core.constants.VS--vs-version","title":"VS - Version","text":"Code Version
Uint Symbol Bits Purpose 0 V0 0000 Initial Version of Code without breaking changes"},{"location":"constants/#iscc_core.constants.LN","title":"LN
","text":""},{"location":"constants/#iscc_core.constants.LN--ln-length","title":"LN - Length","text":"Valid lengths for hash-digests.
- L32 = 32
- L64 = 64
- L72 = 72
- L80 = 80
- L96 = 96
- L128 = 128
- L160 = 160
- L192 = 192
- L224 = 224
- L256 = 256
"},{"location":"constants/#iscc_core.constants.MULTIBASE","title":"MULTIBASE
","text":"Supported Multibase encodings.
- base16 -> f
- base32 -> b
- base32hex -> v
- base58btc -> z
- base64url -> u
"},{"location":"iscc_code/","title":"ISCC-CODE","text":"A multi-component identifier for digital media assets.
An ISCC-CODE can be generated from the concatenation of the digests of the following five ISCC-UNITs together with a single common header:
- Meta-Code - Encodes metadata similarity
- Semantic-Code - Encodes semantic content similarity (to be developed)
- Content-Code - Encodes syntactic/perceptual similarity
- Data-Code - Encodes raw bitstream similarity
- Instance-Code - Data checksum
The following sequences of ISCC-UNITs are possible:
- Data, Instance
- Content, Data, Instance
- Semantic, Data, Instance
- Content, Semantic, Data, Instance
- Meta, Data, Instance
- Meta, Content, Data, Instance
- Meta, Semantic, Data, Instance
- Meta, Semantic, Content, Data, Instance
"},{"location":"iscc_code/#iscc_core.iscc_code.gen_iscc_code_v0","title":"gen_iscc_code_v0(codes)
","text":"Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using algorithm v0.
Parameters:
Name Type Description Default codes
Sequence[str]
A valid sequence of singluar ISCC-UNITS.
required Returns:
Type Description dict
An ISCC object with ISCC-CODE
Source code in iscc_core\\iscc_code.py
def gen_iscc_code_v0(codes):\n# type: (Sequence[str]) -> dict\n\"\"\"\n Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using\n algorithm v0.\n :param Sequence[str] codes: A valid sequence of singluar ISCC-UNITS.\n :return: An ISCC object with ISCC-CODE\n :rtype: dict\n \"\"\"\ncodes = [ic.iscc_clean(code) for code in codes]\n# Check basic constraints\nif len(codes) < 2:\nraise ValueError(\"Minimum two ISCC units required to generate valid ISCC-CODE\")\nfor code in codes:\nif len(code) < 16:\nraise ValueError(f\"Cannot build ISCC-CODE from units shorter than 64-bits: {code}\")\n# Decode units and sort by MainType\ndecoded = sorted(\n[ic.decode_header(ic.decode_base32(code)) for code in codes], key=itemgetter(0)\n)\nmain_types = tuple(d[0] for d in decoded)\nif main_types[-2:] != (ic.MT.DATA, ic.MT.INSTANCE):\nraise ValueError(f\"ISCC-CODE requires at least MT.DATA and MT.INSTANCE units.\")\n# Determine SubType (generic mediatype)\nsub_types = [t[1] for t in decoded if t[0] in {ic.MT.SEMANTIC, ic.MT.CONTENT}]\nif len(set(sub_types)) > 1:\nraise ValueError(f\"Semantic-Code and Content-Code must be of same SubType\")\nst = sub_types.pop() if sub_types else ic.ST_ISCC.SUM if len(codes) == 2 else ic.ST_ISCC.NONE\n# Encode unit combination\nencoded_length = ic.encode_units(main_types[:-2])\n# Collect and truncate unit digests to 64-bit\ndigest = b\"\".join([t[-1][:8] for t in decoded])\nheader = ic.encode_header(ic.MT.ISCC, st, ic.VS.V0, encoded_length)\ncode = ic.encode_base32(header + digest)\niscc = \"ISCC:\" + code\nreturn dict(iscc=iscc)\n
"},{"location":"iscc_id/","title":"ISCC-ID","text":"A decentralized, owned, and short identifier for digital assets.
The ISCC-ID is generated from a similarity-hash of the units of an ISCC-CODE together with a blockchain wallet address. Its SubType designates the blockchain from which the ISCC-ID was minted. The similarity-hash is always at least 64-bits and optionally suffixed with a uvarint
endcoded uniqueness counter
. The uniqueness counter
is added and incremented only if the mint colides with a pre-existing ISCC-ID minted from the same blockchain from a different ISCC-CODE or from an identical ISCC-CODE registered by a different signatory.
"},{"location":"iscc_id/#iscc_core.iscc_id.gen_iscc_id","title":"gen_iscc_id(iscc_code, chain_id, wallet, uc = 0)
","text":"Generate ISCC-ID from ISCC-CODE with the latest standard algorithm.
Parameters:
Name Type Description Default iscc_code
str
The ISCC-CODE from which to mint the ISCC-ID.
required chain_id
int
Chain-ID of blockchain from which the ISCC-ID is minted.
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter of ISCC-ID.
0
Returns:
Type Description dict
ISCC object with an ISCC-ID
"},{"location":"iscc_id/#iscc_core.iscc_id.gen_iscc_id_v0","title":"gen_iscc_id_v0(iscc_code, chain_id, wallet, uc = 0)
","text":"Generate an ISCC-ID from an ISCC-CODE with uniqueness counter 'uc' with algorithm v0.
Parameters:
Name Type Description Default iscc_code
str
The ISCC-CODE from which to mint the ISCC-ID.
required chain_id
int
Chain-ID of blockchain from which the ISCC-ID is minted.
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter of ISCC-ID.
0
Returns:
Type Description dict
ISCC object with an ISCC-ID
"},{"location":"iscc_id/#iscc_core.iscc_id.soft_hash_iscc_id_v0","title":"soft_hash_iscc_id_v0(iscc_code, wallet, uc = 0)
","text":"Calculate ISCC-ID hash digest from ISCC-CODE with algorithm v0.
Accepts an ISCC-CODE or any sequence of ISCC-UNITs.
Parameters:
Name Type Description Default iscc_code
str
ISCC-CODE
required wallet
str
The wallet address that signes the ISCC declaration
required uc
int
Uniqueness counter for ISCC-ID.
0
Returns:
Type Description bytes
Digest for ISCC-ID without header but including uniqueness counter.
"},{"location":"iscc_id/#iscc_core.iscc_id.iscc_id_incr","title":"iscc_id_incr(iscc_id)
","text":"Increment uniqueness counter of an ISCC-ID with latest standard algorithm.
Parameters:
Name Type Description Default iscc_id
str
Base32-encoded ISCC-ID.
required Returns:
Type Description str
Base32-encoded ISCC-ID with counter incremented by one.
"},{"location":"iscc_id/#iscc_core.iscc_id.iscc_id_incr_v0","title":"iscc_id_incr_v0(iscc_id)
","text":"Increment uniqueness counter of an ISCC-ID with algorithm v0.
Parameters:
Name Type Description Default iscc_id
str
Base32-encoded ISCC-ID.
required Returns:
Type Description str
Base32-encoded ISCC-ID with counter incremented by one (without \"ISCC:\" prefix).
"},{"location":"iscc_id/#iscc_core.iscc_id.alg_simhash_from_iscc_id","title":"alg_simhash_from_iscc_id(iscc_id, wallet)
","text":"Extract similarity preserving hex-encoded hash digest from ISCC-ID
We need to un-xor the ISCC-ID hash digest with the wallet address hash to obtain the similarity preserving bytestring.
"},{"location":"iso-reference/","title":"ISCC - ISO Reference","text":"The following functions are the reference implementations of ISO 24138:
"},{"location":"iso-reference/#iso-24138-51-meta-code","title":"ISO 24138 / 5.1 Meta-Code","text":"gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits)
# Create an ISCC Meta-Code with the algorithm version 0.
Parameters:
Name Type Description Default name
str
Name or title of the work manifested by the digital asset
required description
Optional[str]
Optional description for disambiguation
None
meta
Optional[Union[dict,str]
Dict or Data-URL string with extended metadata
None
bits
int
Bit-length of resulting Meta-Code (multiple of 64)
ic.core_opts.meta_bits
Returns:
Type Description dict
ISCC object with possible fields: iscc, name, description, metadata, metahash
Source code in iscc_core\\code_meta.py
def gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits):\n# type: (str, Optional[str], Optional[ic.Meta], int) -> dict\n\"\"\"\n Create an ISCC Meta-Code with the algorithm version 0.\n :param str name: Name or title of the work manifested by the digital asset\n :param Optional[str] description: Optional description for disambiguation\n :param Optional[Union[dict,str] meta: Dict or Data-URL string with extended metadata\n :param int bits: Bit-length of resulting Meta-Code (multiple of 64)\n :return: ISCC object with possible fields: iscc, name, description, metadata, metahash\n :rtype: dict\n \"\"\"\n# 1. Normalize `name`\nname = \"\" if name is None else name\nname = text_clean(name)\nname = text_remove_newlines(name)\nname = text_trim(name, ic.core_opts.meta_trim_name)\nif not name:\nraise ValueError(\"Meta-Code requires non-empty name element (after normalization)\")\n# 2. Normalize `description`\ndescription = \"\" if description is None else description\ndescription = text_clean(description)\ndescription = text_trim(description, ic.core_opts.meta_trim_description)\n# Calculate meta_code, metahash, and output metadata values for the different input cases\nif meta:\nif isinstance(meta, str):\n# Data-URL expected\ndurl = meta\npayload = DataURL.from_url(durl).data\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = durl\nelif isinstance(meta, dict):\npayload = jcs.canonicalize(meta)\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmedia_type = \"application/ld+json\" if \"@context\" in meta else \"application/json\"\ndurl_obj = DataURL.from_data(media_type, base64_encode=True, data=payload)\nmetadata_value = durl_obj.url\nelse:\nraise TypeError(f\"metadata must be Data-URL string or dict not {type(meta)}\")\nelse:\npayload = \" \".join((name, description)).strip().encode(\"utf-8\")\nmeta_code_digest = soft_hash_meta_v0(name, description)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = None\nmeta_code = ic.encode_component(\nmtype=ic.MT.META,\nstype=ic.ST.NONE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=meta_code_digest,\n)\niscc = \"ISCC:\" + meta_code\n# Build result\nresult = {\"iscc\": iscc}\nif name:\nresult[\"name\"] = name\nif description:\nresult[\"description\"] = description\nif metadata_value:\nresult[\"meta\"] = metadata_value\nresult[\"metahash\"] = metahash\nreturn result\n
"},{"location":"iso-reference/#iso-24138-53-text-code","title":"ISO 24138 / 5.3 Text-Code","text":"gen_text_code_v0(text, bits=ic.core_opts.text_bits)
# Create an ISCC Text-Code with algorithm v0.
Note
Any markup (like HTML tags or markdown) should be removed from the plain-text before passing it to this function.
Parameters:
Name Type Description Default text
str
Text for Text-Code creation
required bits
int
Bit-length of ISCC Code Hash (default 64)
ic.core_opts.text_bits
Returns:
Type Description dict
ISCC schema instance with Text-Code and an aditional property characters
Source code in iscc_core\\code_content_text.py
def gen_text_code_v0(text, bits=ic.core_opts.text_bits):\n# type: (str, int) -> dict\n\"\"\"\n Create an ISCC Text-Code with algorithm v0.\n !!! note\n Any markup (like HTML tags or markdown) should be removed from the plain-text\n before passing it to this function.\n :param str text: Text for Text-Code creation\n :param int bits: Bit-length of ISCC Code Hash (default 64)\n :return: ISCC schema instance with Text-Code and an aditional property `characters`\n :rtype: dict\n \"\"\"\ntext = text_collapse(text)\ncharacters = len(text)\ndigest = soft_hash_text_v0(text)\ntext_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.TEXT,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + text_code\nreturn dict(iscc=iscc, characters=characters)\n
"},{"location":"iso-reference/#iso-24138-54-image-code","title":"ISO 24138 / 5.4 Image-Code","text":"gen_image_code_v0(pixels, bits=ic.core_opts.image_bits)
# Create an ISCC Content-Code Image with algorithm v0.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values)
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
Source code in iscc_core\\code_content_image.py
def gen_image_code_v0(pixels, bits=ic.core_opts.image_bits):\n# type: (Sequence[int], int) -> dict\n\"\"\"\n Create an ISCC Content-Code Image with algorithm v0.\n :param Sequence[int] pixels: Normalized image pixels (32x32 flattened gray values)\n :param int bits: Bit-length of ISCC Content-Code Image (default 64).\n :return: ISCC object with Content-Code Image.\n :rtype: ISCC\n \"\"\"\ndigest = soft_hash_image_v0(pixels, bits=bits)\nimage_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.IMAGE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + image_code\nreturn {\"iscc\": iscc}\n
"},{"location":"iso-reference/#iso-24138-55-audio-code","title":"ISO 24138 / 5.5 Audio-Code","text":"gen_audio_code_v0(cv, bits=ic.core_opts.audio_bits)
# Create an ISCC Content-Code Audio with algorithm v0.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
Source code in iscc_core\\code_content_audio.py
def gen_audio_code_v0(cv, bits=ic.core_opts.audio_bits):\n# type: (Iterable[int], int) -> dict\n\"\"\"\n Create an ISCC Content-Code Audio with algorithm v0.\n :param Iterable[int] cv: Chromaprint vector\n :param int bits: Bit-length resulting Content-Code Audio (multiple of 64)\n :return: ISCC object with Content-Code Audio\n :rtype: dict\n \"\"\"\ndigest = soft_hash_audio_v0(cv, bits=bits)\naudio_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.AUDIO,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + audio_code\nreturn {\"iscc\": iscc}\n
"},{"location":"iso-reference/#iso-24138-56-video-code","title":"ISO 24138 / 5.6 Video-Code","text":"gen_video_code_v0(frame_sigs, bits=ic.core_opts.video_bits)
# Create an ISCC Video-Code with algorithm v0.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
Source code in iscc_core\\code_content_video.py
def gen_video_code_v0(frame_sigs, bits=ic.core_opts.video_bits):\n# type: (Sequence[ic.FrameSig], int) -> dict\n\"\"\"\n Create an ISCC Video-Code with algorithm v0.\n :param ic.FrameSig frame_sigs: Sequence of MP7 frame signatures\n :param int bits: Bit-length resulting Video-Code (multiple of 64)\n :return: ISCC object with Video-Code\n :rtype: dict\n \"\"\"\ndigest = soft_hash_video_v0(frame_sigs, bits=bits)\nvideo_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.VIDEO,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + video_code\nreturn dict(iscc=iscc)\n
"},{"location":"iso-reference/#iso-24138-57-mixed-code","title":"ISO 24138 / 5.7 Mixed-Code","text":"gen_mixed_code_v0(codes, bits=ic.core_opts.mixed_bits)
# Create an ISCC Content-Code-Mixed with algorithm v0.
If the provided codes are of mixed length they are stripped to bits
length for calculation.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
Source code in iscc_core\\code_content_mixed.py
def gen_mixed_code_v0(codes, bits=ic.core_opts.mixed_bits):\n# type: (Sequence[str], int) -> dict\n\"\"\"\n Create an ISCC Content-Code-Mixed with algorithm v0.\n If the provided codes are of mixed length they are stripped to `bits` length for\n calculation.\n :param Iterable[str] codes: a list of Content-Codes.\n :param int bits: Target bit-length of generated Content-Code-Mixed.\n :return: ISCC object with Content-Code Mixed.\n :rtype: dict\n \"\"\"\ndigests = [ic.decode_base32(ic.iscc_clean(code)) for code in codes]\ndigest = soft_hash_codes_v0(digests, bits=bits)\nmixed_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.MIXED,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + mixed_code\nreturn dict(iscc=iscc, parts=list(codes))\n
"},{"location":"iso-reference/#iso-24138-58-data-code","title":"ISO 24138 / 5.8 Data-Code","text":"gen_data_code_v0(stream, bits=ic.core_opts.data_bits)
# Create an ISCC Data-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC object with Data-Code
Source code in iscc_core\\code_data.py
def gen_data_code_v0(stream, bits=ic.core_opts.data_bits):\n# type: (ic.Stream, int) -> dict\n\"\"\"\n Create an ISCC Data-Code with algorithm v0.\n :param Stream stream: Input data stream.\n :param int bits: Bit-length of ISCC Data-Code (default 64).\n :return: ISCC object with Data-Code\n :rtype: dict\n \"\"\"\nhasher = DataHasherV0()\ndata = stream.read(ic.core_opts.io_read_size)\nwhile data:\nhasher.push(data)\ndata = stream.read(ic.core_opts.io_read_size)\ndata_code = hasher.code(bits=bits)\niscc = \"ISCC:\" + data_code\nreturn dict(iscc=iscc)\n
"},{"location":"iso-reference/#iso-24138-59-instance-code","title":"ISO 24138 / 5.9 Instance-Code","text":"gen_instance_code_v0(stream, bits=ic.core_opts.instance_bits)
# Create an ISCC Instance-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length of resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with Instance-Code and properties: datahash, filesize
Source code in iscc_core\\code_instance.py
def gen_instance_code_v0(stream, bits=ic.core_opts.instance_bits):\n# type: (ic.Stream, int) -> dict\n\"\"\"\n Create an ISCC Instance-Code with algorithm v0.\n :param Stream stream: Binary data stream for Instance-Code generation\n :param int bits: Bit-length of resulting Instance-Code (multiple of 64)\n :return: ISCC object with Instance-Code and properties: datahash, filesize\n :rtype: dict\n \"\"\"\nhasher = InstanceHasherV0()\ndata = stream.read(ic.core_opts.io_read_size)\nwhile data:\nhasher.push(data)\ndata = stream.read(ic.core_opts.io_read_size)\ninstance_code = hasher.code(bits=bits)\niscc = \"ISCC:\" + instance_code\ninstance_code_obj = dict(\niscc=iscc,\ndatahash=hasher.multihash(),\nfilesize=hasher.filesize,\n)\nreturn instance_code_obj\n
"},{"location":"iso-reference/#iso-24138-60-iscc-code","title":"ISO 24138 / 6.0 ISCC-CODE","text":"gen_iscc_code_v0(codes)
# Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using algorithm v0.
Parameters:
Name Type Description Default codes
Sequence[str]
A valid sequence of singluar ISCC-UNITS.
required Returns:
Type Description dict
An ISCC object with ISCC-CODE
Source code in iscc_core\\iscc_code.py
def gen_iscc_code_v0(codes):\n# type: (Sequence[str]) -> dict\n\"\"\"\n Combine multiple ISCC-UNITS to an ISCC-CODE with a common header using\n algorithm v0.\n :param Sequence[str] codes: A valid sequence of singluar ISCC-UNITS.\n :return: An ISCC object with ISCC-CODE\n :rtype: dict\n \"\"\"\ncodes = [ic.iscc_clean(code) for code in codes]\n# Check basic constraints\nif len(codes) < 2:\nraise ValueError(\"Minimum two ISCC units required to generate valid ISCC-CODE\")\nfor code in codes:\nif len(code) < 16:\nraise ValueError(f\"Cannot build ISCC-CODE from units shorter than 64-bits: {code}\")\n# Decode units and sort by MainType\ndecoded = sorted(\n[ic.decode_header(ic.decode_base32(code)) for code in codes], key=itemgetter(0)\n)\nmain_types = tuple(d[0] for d in decoded)\nif main_types[-2:] != (ic.MT.DATA, ic.MT.INSTANCE):\nraise ValueError(f\"ISCC-CODE requires at least MT.DATA and MT.INSTANCE units.\")\n# Determine SubType (generic mediatype)\nsub_types = [t[1] for t in decoded if t[0] in {ic.MT.SEMANTIC, ic.MT.CONTENT}]\nif len(set(sub_types)) > 1:\nraise ValueError(f\"Semantic-Code and Content-Code must be of same SubType\")\nst = sub_types.pop() if sub_types else ic.ST_ISCC.SUM if len(codes) == 2 else ic.ST_ISCC.NONE\n# Encode unit combination\nencoded_length = ic.encode_units(main_types[:-2])\n# Collect and truncate unit digests to 64-bit\ndigest = b\"\".join([t[-1][:8] for t in decoded])\nheader = ic.encode_header(ic.MT.ISCC, st, ic.VS.V0, encoded_length)\ncode = ic.encode_base32(header + digest)\niscc = \"ISCC:\" + code\nreturn dict(iscc=iscc)\n
"},{"location":"algorithms/cdc/","title":"ISCC - Content Defined Chunking","text":"Compatible with fastcdc
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_chunks","title":"alg_cdc_chunks(data, utf32, avg_chunk_size = ic.core_opts.data_avg_chunk_size)
","text":"A generator that yields data-dependent chunks for data
.
Usage Example:
for chunk in cdc_data_chunks(data):\nhash(chunk)\n
Parameters:
Name Type Description Default data
bytes
Raw data for variable sized chunking.
required utf32
bool
If true assume we are chunking text that is utf32 encoded.
required avg_chunk_size
int
Target chunk size in number of bytes.
ic.core_opts.data_avg_chunk_size
Returns:
Type Description Generator[bytes]
A generator that yields data chunks of variable sizes.
Source code in iscc_core\\cdc.py
def alg_cdc_chunks(data, utf32, avg_chunk_size=ic.core_opts.data_avg_chunk_size):\n# type: (Data, bool, int) -> Generator[bytes, None, None]\n\"\"\"\n A generator that yields data-dependent chunks for `data`.\n Usage Example:\n ```python\n for chunk in cdc_data_chunks(data):\n hash(chunk)\n ```\n :param bytes data: Raw data for variable sized chunking.\n :param bool utf32: If true assume we are chunking text that is utf32 encoded.\n :param int avg_chunk_size: Target chunk size in number of bytes.\n :return: A generator that yields data chunks of variable sizes.\n :rtype: Generator[bytes]\n \"\"\"\nstream = io.BytesIO(data)\nbuffer = stream.read(ic.core_opts.io_read_size)\nif not buffer:\nyield b\"\"\nmi, ma, cs, mask_s, mask_l = alg_cdc_params(avg_chunk_size)\nbuffer = memoryview(buffer)\nwhile buffer:\nif len(buffer) <= ma:\nbuffer = memoryview(bytes(buffer) + stream.read(ic.core_opts.io_read_size))\ncut_point = alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l)\n# Make sure cut points are at 4-byte aligned for utf32 encoded text\nif utf32:\ncut_point -= cut_point % 4\nyield bytes(buffer[:cut_point])\nbuffer = buffer[cut_point:]\n
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_offset","title":"alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l)
","text":"Find breakpoint offset for a given buffer.
Parameters:
Name Type Description Default buffer
Data
The data to be chunked.
required mi
int
Minimum chunk size.
required ma
int
Maximung chunk size.
required cs
int
Center size.
required mask_s
int
Small mask.
required mask_l
int
Large mask.
required Returns:
Type Description int
Offset of dynamic cutpoint in number of bytes.
Source code in iscc_core\\cdc.py
def alg_cdc_offset(buffer, mi, ma, cs, mask_s, mask_l):\n# type: (ic.Data, int, int, int, int, int) -> int\n\"\"\"\n Find breakpoint offset for a given buffer.\n :param Data buffer: The data to be chunked.\n :param int mi: Minimum chunk size.\n :param int ma: Maximung chunk size.\n :param int cs: Center size.\n :param int mask_s: Small mask.\n :param int mask_l: Large mask.\n :return: Offset of dynamic cutpoint in number of bytes.\n :rtype: int\n \"\"\"\npattern = 0\ni = mi\nsize = len(buffer)\nbarrier = min(cs, size)\nwhile i < barrier:\npattern = (pattern >> 1) + ic.core_opts.cdc_gear[buffer[i]]\nif not pattern & mask_s:\nreturn i + 1\ni += 1\nbarrier = min(ma, size)\nwhile i < barrier:\npattern = (pattern >> 1) + ic.core_opts.cdc_gear[buffer[i]]\nif not pattern & mask_l:\nreturn i + 1\ni += 1\nreturn i\n
"},{"location":"algorithms/cdc/#iscc_core.cdc.alg_cdc_params","title":"alg_cdc_params(avg_size: int) -> tuple
","text":"Calculate CDC parameters
Parameters:
Name Type Description Default avg_size
int
Target average size of chunks in number of bytes.
required Returns:
Type Description tuple
Tuple of (min_size, max_size, center_size, mask_s, mask_l).
Source code in iscc_core\\cdc.py
def alg_cdc_params(avg_size: int) -> tuple:\n\"\"\"\n Calculate CDC parameters\n :param int avg_size: Target average size of chunks in number of bytes.\n :returns: Tuple of (min_size, max_size, center_size, mask_s, mask_l).\n \"\"\"\nceil_div = lambda x, y: (x + y - 1) // y\nmask = lambda b: 2**b - 1\nmin_size = avg_size // 4\nmax_size = avg_size * 8\noffset = min_size + ceil_div(min_size, 2)\ncenter_size = avg_size - offset\nbits = round(log2(avg_size))\nmask_s = mask(bits + 1)\nmask_l = mask(bits - 1)\nreturn min_size, max_size, center_size, mask_s, mask_l\n
"},{"location":"algorithms/dct/","title":"ISCC - Discrete Cosine Transform","text":""},{"location":"algorithms/dct/#iscc_core.dct.alg_dct","title":"alg_dct(v)
","text":"Discrete cosine transform.
See: nayuki.io.
Parameters:
Name Type Description Default v
Sequence[float]
Input vector for DCT calculation.
required Returns:
Type Description List
DCT Transformed vector.
Source code in iscc_core\\dct.py
def alg_dct(v):\n# type: (Sequence[float]) -> List\n\"\"\"\n Discrete cosine transform.\n See: [nayuki.io](https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms).\n :param Sequence[float] v: Input vector for DCT calculation.\n :return: DCT Transformed vector.\n :rtype: List\n \"\"\"\nn = len(v)\nif n == 1:\nreturn list(v)\nelif n == 0 or n % 2 != 0:\nraise ValueError()\nelse:\nhalf = n // 2\nalpha = [(v[i] + v[-(i + 1)]) for i in range(half)]\nbeta = [\n(v[i] - v[-(i + 1)]) / (math.cos((i + 0.5) * math.pi / n) * 2.0) for i in range(half)\n]\nalpha = alg_dct(alpha)\nbeta = alg_dct(beta)\nresult = []\nfor i in range(half - 1):\nresult.append(alpha[i])\nresult.append(beta[i] + beta[i + 1])\nresult.append(alpha[-1])\nresult.append(beta[-1])\nreturn result\n
"},{"location":"algorithms/minhash/","title":"ISCC - Minhash","text":""},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash","title":"alg_minhash(features)
","text":"Calculate a 64 dimensional minhash integer vector.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description List[int]
Minhash vector
Source code in iscc_core\\minhash.py
def alg_minhash(features):\n# type: (List[int]) -> List[int]\n\"\"\"\n Calculate a 64 dimensional minhash integer vector.\n :param List[int] features: List of integer features\n :return: Minhash vector\n :rtype: List[int]\n \"\"\"\nreturn [\nmin([(((a * f + b) & MAXI64) % MPRIME) & MAXH for f in features]) for a, b in zip(MPA, MPB)\n]\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_64","title":"alg_minhash_64(features)
","text":"Create 64-bit minimum hash digest.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description bytes
64-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_64(features):\n# type: (List[int]) -> bytes\n\"\"\"\n Create 64-bit minimum hash digest.\n :param List[int] features: List of integer features\n :return: 64-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nreturn alg_minhash_compress(alg_minhash(features), 1)\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_256","title":"alg_minhash_256(features)
","text":"Create 256-bit minimum hash digest.
Parameters:
Name Type Description Default features
List[int]
List of integer features
required Returns:
Type Description bytes
256-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_256(features):\n# type: (List[int]) -> bytes\n\"\"\"\n Create 256-bit minimum hash digest.\n :param List[int] features: List of integer features\n :return: 256-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nreturn alg_minhash_compress(alg_minhash(features), 4)\n
"},{"location":"algorithms/minhash/#iscc_core.minhash.alg_minhash_compress","title":"alg_minhash_compress(mhash, lsb = 4)
","text":"Compress minhash vector to byte hash-digest.
Concatenates lsb
number of least-significant bits from each integer in mhash
. For example an mhash
with 64 integers and lsb=4
will produce a 256-bit summary of the minhash vector.
Parameters:
Name Type Description Default mhash
List[int]
List of minhash integer features
required lsb
int
Number of the least significant bits to retain
4
Returns:
Type Description bytes
256-bit binary from the least significant bits of the minhash values
Source code in iscc_core\\minhash.py
def alg_minhash_compress(mhash, lsb=4):\n# type: (List[int], int) -> bytes\n\"\"\"\n Compress minhash vector to byte hash-digest.\n Concatenates `lsb` number of least-significant bits from each integer in `mhash`.\n For example an `mhash` with 64 integers and `lsb=4` will produce a 256-bit summary\n of the minhash vector.\n :param List[int] mhash: List of minhash integer features\n :param int lsb: Number of the least significant bits to retain\n :return: 256-bit binary from the least significant bits of the minhash values\n :rtype: bytes\n \"\"\"\nbits: str = \"\"\nfor bitpos in range(lsb):\nfor h in mhash:\nbits += str(h >> bitpos & 1)\nreturn int(bits, 2).to_bytes((len(bits) + 7) // 8, \"big\")\n
"},{"location":"algorithms/simhash/","title":"ISCC - Simhash","text":""},{"location":"algorithms/simhash/#iscc_core.simhash.alg_simhash","title":"alg_simhash(hash_digests)
","text":"Creates a similarity preserving hash from a sequence of equal sized hash digests.
Parameters:
Name Type Description Default hash_digests
list
A sequence of equaly sized byte-hashes.
required Returns:
Type Description bytes
Similarity byte-hash
Source code in iscc_core\\simhash.py
def alg_simhash(hash_digests):\n# type: (list[bytes]) -> bytes\n\"\"\"\n Creates a similarity preserving hash from a sequence of equal sized hash digests.\n :param list hash_digests: A sequence of equaly sized byte-hashes.\n :returns: Similarity byte-hash\n :rtype: bytes\n \"\"\"\nn_bytes = len(hash_digests[0])\nn_bits = n_bytes * 8\nvector = [0] * n_bits\nfor digest in hash_digests:\nh = bitarray()\nh.frombytes(digest)\nfor i in range(n_bits):\nvector[i] += h[i]\nminfeatures = len(hash_digests) / 2\nshash = 0\nfor i in range(n_bits):\nif vector[i] >= minfeatures:\nshash |= 1 << (n_bits - 1 - i)\nreturn shash.to_bytes(n_bytes, \"big\")\n
"},{"location":"algorithms/wtahash/","title":"ISCC - Winner Takes All Hash","text":""},{"location":"algorithms/wtahash/#iscc_core.wtahash.alg_wtahash","title":"alg_wtahash(vec: Sequence[float], bits: Sequence[float]) -> bytes
","text":"Calculate WTA Hash for vector with 380 values (MP7 frame signature).
Source code in iscc_core\\wtahash.py
def alg_wtahash(vec: Sequence[float], bits) -> bytes:\n\"\"\"Calculate WTA Hash for vector with 380 values (MP7 frame signature).\"\"\"\nh = []\nfor perm in WTA_VIDEO_ID_PERMUTATIONS:\nv = vec[perm[0]], vec[perm[1]]\nh.append(v.index(max(v)))\nif len(h) == bits:\nbreak\nh = bitarray(h).tobytes()\nreturn h\n
"},{"location":"codec/","title":"ISCC - Codec","text":"This module implements encoding, decoding and transcoding functions of ISCC
"},{"location":"codec/#codec-overview","title":"Codec Overview","text":""},{"location":"codec/#codec-functions","title":"Codec Functions","text":""},{"location":"codec/#iscc_core.codec.encode_component","title":"encode_component(mtype, stype, version, bit_length, digest)
","text":"Encode an ISCC unit inlcuding header and body with standard base32 encoding.
Note
The length
value must be the length in number of bits for the component. If digest
has more bits than specified by length
it wil be truncated.
Parameters:
Name Type Description Default mtype
MainType
Maintype of unit (0-6)
required stype
SubType
SubType of unit depending on MainType (0-5)
required version
Version
Version of unit algorithm (0).
required bit_length
length
Length of unit, in number of bits (multiple of 32)
required digest
bytes
The hash digest of the unit.
required Returns:
Type Description str
Base32 encoded ISCC-UNIT.
Source code in iscc_core\\codec.py
def encode_component(mtype, stype, version, bit_length, digest):\n# type: (MainType, SubType, Version, Length, bytes) -> str\n\"\"\"\n Encode an ISCC unit inlcuding header and body with standard base32 encoding.\n !!! note\n The `length` value must be the **length in number of bits** for the component.\n If `digest` has more bits than specified by `length` it wil be truncated.\n :param MainType mtype: Maintype of unit (0-6)\n :param SubType stype: SubType of unit depending on MainType (0-5)\n :param Version version: Version of unit algorithm (0).\n :param length bit_length: Length of unit, in number of bits (multiple of 32)\n :param bytes digest: The hash digest of the unit.\n :return: Base32 encoded ISCC-UNIT.\n :rtype: str\n \"\"\"\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.ID, MT.FLAKE):\nencoded_length = encode_length(mtype, bit_length)\nelif mtype == MT.ISCC:\nraise ValueError(f\"{mtype} is not a unit\")\nelse:\nraise ValueError(f\"Illegal MainType {mtype}\")\nnbytes = bit_length // 8\nheader = encode_header(mtype, stype, version, encoded_length)\nbody = digest[:nbytes]\ncomponent_code = encode_base32(header + body)\nreturn component_code\n
"},{"location":"codec/#iscc_core.codec.encode_header","title":"encode_header(mtype, stype, version = 0, length = 1)
","text":"Encodes header values with nibble-sized (4-bit) variable-length encoding. The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles is uneven it is padded with 4-bit 0000
at the end.
Warning
The length value must be encoded beforhand because its semantics depend on the MainType (see encode_length
function).
Parameters:
Name Type Description Default mtype
MainType
MainType of unit.
required stype
SubType
SubType of unit.
required version
Version
Version of component algorithm.
0
length
Length
length value of unit (1 means 64-bits for standard units)
1
Returns:
Type Description bytes
Varnibble stream encoded ISCC header as bytes.
Source code in iscc_core\\codec.py
def encode_header(mtype, stype, version=0, length=1):\n# type: (MainType, SubType, Version, Length) -> bytes\n\"\"\"\n Encodes header values with nibble-sized (4-bit) variable-length encoding.\n The result is minimum 2 and maximum 8 bytes long. If the final count of nibbles\n is uneven it is padded with 4-bit `0000` at the end.\n !!! warning\n The length value must be encoded beforhand because its semantics depend on\n the MainType (see `encode_length` function).\n :param MainType mtype: MainType of unit.\n :param SubType stype: SubType of unit.\n :param Version version: Version of component algorithm.\n :param Length length: length value of unit (1 means 64-bits for standard units)\n :return: Varnibble stream encoded ISCC header as bytes.\n :rtype: bytes\n \"\"\"\n# TODO verify that all header params and there combination is valid\nheader = bitarray()\nfor n in (mtype, stype, version, length):\nheader += encode_varnibble(n)\n# Append zero-padding if required (right side, least significant bits).\nheader.fill()\nreturn header.tobytes()\n
"},{"location":"codec/#iscc_core.codec.decode_header","title":"decode_header(data)
","text":"Decodes varnibble encoded header and returns it together with tail data
.
Tail data is included to enable decoding of sequential ISCCs. The returned tail data must be truncated to decode_length(r[0], r[3]) bits to recover the actual hash-bytes.
Parameters:
Name Type Description Default data
bytes
ISCC bytes
required Returns:
Type Description IsccTuple
(MainType, SubType, Version, length, TailData)
Source code in iscc_core\\codec.py
def decode_header(data):\n# type: (bytes) -> IsccTuple\n\"\"\"\n Decodes varnibble encoded header and returns it together with `tail data`.\n Tail data is included to enable decoding of sequential ISCCs. The returned tail\n data must be truncated to decode_length(r[0], r[3]) bits to recover the actual\n hash-bytes.\n :param bytes data: ISCC bytes\n :return: (MainType, SubType, Version, length, TailData)\n :rtype: IsccTuple\n \"\"\"\nresult = []\nba = bitarray()\nba.frombytes(data)\ndata = ba\nfor _ in range(4):\nvalue, data = decode_varnibble(data)\nresult.append(value)\n# Strip 4-bit padding if required\nif len(data) % 8 and data[:4] == bitarray(\"0000\"):\ndata = data[4:]\nresult.append(data.tobytes())\nreturn tuple(result)\n
"},{"location":"codec/#iscc_core.codec.encode_varnibble","title":"encode_varnibble(n)
","text":"Writes integer to variable length sequence of 4-bit chunks.
Variable-length encoding scheme:
prefix bits nibbles data bits unsigned range 0 1 3 0 - 7 10 2 6 8 - 71 110 3 9 72 - 583 1110 4 12 584 - 4679 Parameters:
Name Type Description Default n
int
Positive integer to be encoded as varnibble (0-4679)
required Returns:
Type Description bitarray
Varnibble encoded integera
Source code in iscc_core\\codec.py
def encode_varnibble(n):\n# type: (int) -> bitarray\n\"\"\"\n Writes integer to variable length sequence of 4-bit chunks.\n Variable-length encoding scheme:\n ------------------------------------------------------\n | prefix bits | nibbles | data bits | unsigned range |\n | ----------- | ------- | --------- | -------------- |\n | 0 | 1 | 3 | 0 - 7 |\n | 10 | 2 | 6 | 8 - 71 |\n | 110 | 3 | 9 | 72 - 583 |\n | 1110 | 4 | 12 | 584 - 4679 |\n :param int n: Positive integer to be encoded as varnibble (0-4679)\n :return: Varnibble encoded integera\n :rtype: bitarray\n \"\"\"\nif 0 <= n < 8:\nreturn int2ba(n, length=4)\nelif 8 <= n < 72:\nreturn bitarray(\"10\") + int2ba(n - 8, length=6)\nelif 72 <= n < 584:\nreturn bitarray(\"110\") + int2ba(n - 72, length=9)\nelif 584 <= n < 4680:\nreturn bitarray(\"1110\") + int2ba(n - 584, length=12)\nelse:\nraise ValueError(\"Value must be between 0 and 4679\")\n
"},{"location":"codec/#iscc_core.codec.decode_varnibble","title":"decode_varnibble(b)
","text":"Reads first varnibble, returns its integer value and remaining bits.
Parameters:
Name Type Description Default b
bitarray
Array of header bits
required Returns:
Type Description Tuple[int, bitarray]
A tuple of the integer value of first varnible and the remaining bits.
Source code in iscc_core\\codec.py
def decode_varnibble(b):\n# type: (bitarray) -> Tuple[int, bitarray]\n\"\"\"Reads first varnibble, returns its integer value and remaining bits.\n :param bitarray b: Array of header bits\n :return: A tuple of the integer value of first varnible and the remaining bits.\n :rtype: Tuple[int, bitarray]\n \"\"\"\nbits = len(b)\nif bits >= 4 and b[0] == 0:\nreturn ba2int(b[:4]), b[4:]\nif bits >= 8 and b[1] == 0:\nreturn ba2int(b[2:8]) + 8, b[8:]\nif bits >= 12 and b[2] == 0:\nreturn ba2int(b[3:12]) + 72, b[12:]\nif bits >= 16 and b[3] == 0:\nreturn ba2int(b[4:16]) + 584, b[16:]\nraise ValueError(\"Invalid bitarray\")\n
"},{"location":"codec/#iscc_core.codec.encode_units","title":"encode_units(units)
","text":"Encodes a combination of ISCC units to an integer between 0-7 to be used as length value for the final encoding of MT.ISCC
Parameters:
Name Type Description Default units
Tuple
A tuple of a MainType combination (can be empty)
required Returns:
Type Description int
Integer value to be used as length-value for header encoding
Source code in iscc_core\\codec.py
def encode_units(units):\n# type: (Tuple[MT, ...]) -> int\n\"\"\"\n Encodes a combination of ISCC units to an integer between 0-7 to be used as length\n value for the final encoding of MT.ISCC\n :param Tuple units: A tuple of a MainType combination (can be empty)\n :return: Integer value to be used as length-value for header encoding\n :rtype: int\n \"\"\"\nreturn UNITS.index(units)\n
"},{"location":"codec/#iscc_core.codec.decode_units","title":"decode_units(unit_id)
","text":"Decodes an ISCC header length value that has been encoded with a unit_id to an ordered tuple of MainTypes.
Source code in iscc_core\\codec.py
def decode_units(unit_id):\n# type: (int) -> Tuple[MT, ...]\n\"\"\"\n Decodes an ISCC header length value that has been encoded with a unit_id to an\n ordered tuple of MainTypes.\n \"\"\"\nunits = sorted(UNITS[unit_id])\nreturn tuple(MT(u) for u in units)\n
"},{"location":"codec/#iscc_core.codec.encode_length","title":"encode_length(mtype, length)
","text":"Encode length to integer value for header encoding.
The length
value has MainType-specific semantics:
For MainTypes META
, SEMANTIC
, CONTENT
, DATA
, INSTANCE
:
Length means number of bits for the body.\nLength is encoded as the multiple of 32-bit chunks (0 being 32bits)\nExamples: 32 -> 0, 64 -> 1, 96 -> 2 ...\n
For MainType ISCC
:
MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are\noptional. Length means the composition of optional 64-bit units included\nin the ISCC composite.\n\nExamples:\n No optional units -> 0000 -> 0\n CONTENT -> 0001 -> 1\n SEMANTIC -> 0010 -> 2\n SEMANTIC, CONTENT -> 0011 -> 3\n META -> 0100 -> 4\n META, CONTENT -> 0101 -> 5\n ...\n
For MainType ID
:
Lengths means number the number of bits for the body including the counter\nLength is encoded as number of bytes of the counter (64-bit body is implicit)\nExamples:\n 64 -> 0 (No counter)\n 72 -> 1 (One byte counter)\n 80 -> 2 (Two byte counter)\n ...\n
Parameters:
Name Type Description Default mtype
MainType
The MainType for which to encode the length value.
required length
Length
The length expressed according to the semantics of the type
required Returns:
Type Description int
The length value encoded as integer for use with write_header.
Source code in iscc_core\\codec.py
def encode_length(mtype, length):\n# type: (MainType, Length) -> int\n\"\"\"\n Encode length to integer value for header encoding.\n The `length` value has MainType-specific semantics:\n For MainTypes `META`, `SEMANTIC`, `CONTENT`, `DATA`, `INSTANCE`:\n Length means number of bits for the body.\n Length is encoded as the multiple of 32-bit chunks (0 being 32bits)\n Examples: 32 -> 0, 64 -> 1, 96 -> 2 ...\n For MainType `ISCC`:\n MainTypes `DATA` and `INSTANCE` are mandatory for ISCC-CODEs, all others are\n optional. Length means the composition of optional 64-bit units included\n in the ISCC composite.\n Examples:\n No optional units -> 0000 -> 0\n CONTENT -> 0001 -> 1\n SEMANTIC -> 0010 -> 2\n SEMANTIC, CONTENT -> 0011 -> 3\n META -> 0100 -> 4\n META, CONTENT -> 0101 -> 5\n ...\n For MainType `ID`:\n Lengths means number the number of bits for the body including the counter\n Length is encoded as number of bytes of the counter (64-bit body is implicit)\n Examples:\n 64 -> 0 (No counter)\n 72 -> 1 (One byte counter)\n 80 -> 2 (Two byte counter)\n ...\n :param MainType mtype: The MainType for which to encode the length value.\n :param Length length: The length expressed according to the semantics of the type\n :return: The length value encoded as integer for use with write_header.\n :rtype: int\n \"\"\"\nerror = f\"Invalid length {length} for MainType {mtype}\"\n# standard case (length field denotes number of 32-bit chunks, 0 being 32-bits)\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):\nif length >= 32 and not length % 32:\nreturn (length // 32) - 1\nraise ValueError(error)\n# flag type encoding of included components (pass through as encoded out-of-band)\nelif mtype == MT.ISCC:\nif 0 <= length <= 7:\nreturn length\nraise ValueError(error)\n# counter byte lenght encoding\nelif mtype == MT.ID:\nif 64 <= length <= 96:\nreturn (length - 64) // 8\nraise ValueError(error)\nelse:\nraise ValueError(error)\n
"},{"location":"codec/#iscc_core.codec.decode_length","title":"decode_length(mtype, length)
","text":"Dedoce raw length value from ISCC header to length of digest in number of bits.
Decodes a raw header integer value in to its semantically meaningfull value (e.g. number of bits)
Source code in iscc_core\\codec.py
def decode_length(mtype, length):\n# type: (MainType, Length) -> LN\n\"\"\"\n Dedoce raw length value from ISCC header to length of digest in number of bits.\n Decodes a raw header integer value in to its semantically meaningfull value (e.g.\n number of bits)\n \"\"\"\nif mtype in (MT.META, MT.SEMANTIC, MT.CONTENT, MT.DATA, MT.INSTANCE, MT.FLAKE):\nreturn LN((length + 1) * 32)\nelif mtype == MT.ISCC:\nreturn LN(len(decode_units(length)) * 64 + 128)\nelif mtype == MT.ID:\nreturn LN(length * 8 + 64)\nelse:\nraise ValueError(f\"Invalid length {length} for MainType {mtype}\")\n
"},{"location":"codec/#iscc_core.codec.encode_base32","title":"encode_base32(data)
","text":"Standard RFC4648 base32 encoding without padding.
Source code in iscc_core\\codec.py
def encode_base32(data):\n# type: (bytes) -> str\n\"\"\"\n Standard RFC4648 base32 encoding without padding.\n \"\"\"\nreturn b32encode(data).decode(\"ascii\").rstrip(\"=\")\n
"},{"location":"codec/#iscc_core.codec.decode_base32","title":"decode_base32(code)
","text":"Standard RFC4648 base32 decoding without padding and with casefolding.
Source code in iscc_core\\codec.py
def decode_base32(code):\n# type: (str) -> bytes\n\"\"\"\n Standard RFC4648 base32 decoding without padding and with casefolding.\n \"\"\"\n# python stdlib does not support base32 without padding, so we have to re-pad.\ncl = len(code)\npad_length = math.ceil(cl / 8) * 8 - cl\nreturn bytes(b32decode(code + \"=\" * pad_length, casefold=True))\n
"},{"location":"codec/#iscc_core.codec.iscc_decompose","title":"iscc_decompose(iscc_code)
","text":"Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.
A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated by a hyphen.
Source code in iscc_core\\codec.py
def iscc_decompose(iscc_code):\n# type: (str) -> List[str]\n\"\"\"\n Decompose a normalized ISCC-CODE or any valid ISCC sequence into a list of ISCC-UNITS.\n A valid ISCC sequence is a string concatenation of ISCC-UNITS optionally seperated\n by a hyphen.\n \"\"\"\niscc_code = iscc_clean(iscc_code)\ncomponents = []\nraw_code = decode_base32(iscc_code)\nwhile raw_code:\nmt, st, vs, ln, body = decode_header(raw_code)\n# standard ISCC-UNIT with tail continuation\nif mt != MT.ISCC:\nln_bits = decode_length(mt, ln)\ncode = encode_component(mt, st, vs, ln_bits, body[: ln_bits // 8])\ncomponents.append(code)\nraw_code = body[ln_bits // 8 :]\ncontinue\n# ISCC-CODE\nmain_types = decode_units(ln)\n# rebuild dynamic units (META, SEMANTIC, CONTENT)\nfor idx, mtype in enumerate(main_types):\nstype = ST.NONE if mtype == MT.META else st\ncode = encode_component(mtype, stype, vs, 64, body[idx * 8 :])\ncomponents.append(code)\n# rebuild static units (DATA, INSTANCE)\ndata_code = encode_component(MT.DATA, ST.NONE, vs, 64, body[-16:-8])\ninstance_code = encode_component(MT.INSTANCE, ST.NONE, vs, 64, body[-8:])\ncomponents.extend([data_code, instance_code])\nbreak\nreturn components\n
"},{"location":"codec/#iscc_core.codec.iscc_normalize","title":"iscc_normalize(iscc_code)
","text":"Normalize an ISCC to its canonical form.
The canonical form of an ISCC is its shortest base32 encoded representation prefixed with the string ISCC:
.
Possible valid inputs:
MEACB7X7777574L6\nISCC:MEACB7X7777574L6\nfcc010001657fe7cafe9791bb\niscc:maagztfqttvizpjr\nIscc:Maagztfqttvizpjr\n
Info
A concatenated sequence of codes will be composed into a single ISCC of MainType MT.ISCC
if possible.
Example
>>> import iscc_core\n>>> iscc_core.iscc_normalize(\"GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W\")\n'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'\n
Parameters:
Name Type Description Default iscc_code
str
Any valid ISCC string
required Returns:
Type Description str
Normalized ISCC
Source code in iscc_core\\codec.py
def iscc_normalize(iscc_code):\n# type: (str) -> str\n\"\"\"\n Normalize an ISCC to its canonical form.\n The canonical form of an ISCC is its shortest base32 encoded representation\n prefixed with the string `ISCC:`.\n Possible valid inputs:\n MEACB7X7777574L6\n ISCC:MEACB7X7777574L6\n fcc010001657fe7cafe9791bb\n iscc:maagztfqttvizpjr\n Iscc:Maagztfqttvizpjr\n !!! info\n A concatenated sequence of codes will be composed into a single ISCC of MainType\n `MT.ISCC` if possible.\n !!! example\n ``` py\n >>> import iscc_core\n >>> iscc_core.iscc_normalize(\"GAAW2PRCRS5LNVZV-IAAUVACQKXE3V44W\")\n 'ISCC:KUAG2PRCRS5LNVZVJKAFAVOJXLZZM'\n ```\n :param str iscc_code: Any valid ISCC string\n :return: Normalized ISCC\n :rtype: str\n \"\"\"\nfrom iscc_core.iscc_code import gen_iscc_code_v0\ndecoders = {\nMULTIBASE.base16.value: bytes.fromhex, # f\nMULTIBASE.base32.value: decode_base32, # b\nMULTIBASE.base32hex.value: decode_base32hex, # v\nMULTIBASE.base58btc.value: base58.b58decode, # z\nMULTIBASE.base64url.value: decode_base64, # u\n}\n# Transcode to base32 if <multibase><multicodec> encoded\nmultibase_prefix = iscc_code[0]\nif multibase_prefix in decoders.keys():\ndecoder = decoders[multibase_prefix]\ndecoded = decoder(iscc_code[1:])\nif not decoded.startswith(MC_PREFIX):\nraise ValueError(f\"Malformed multiformat codec: {decoded[:2]}\")\niscc_code = encode_base32(decoded[2:])\nelse:\nprefix = iscc_code.lstrip(\"ISCC:\").lstrip(\"iscc:\")[:2].upper()\nif prefix not in PREFIXES:\nraise ValueError(f\"ISCC starts with invalid prefix {prefix}\")\ndecomposed = iscc_decompose(iscc_code)\nrecomposed = gen_iscc_code_v0(decomposed)[\"iscc\"] if len(decomposed) >= 2 else decomposed[0]\nreturn f\"ISCC:{recomposed}\" if not recomposed.startswith(\"ISCC:\") else recomposed\n
"},{"location":"codec/#alternate-encodings","title":"Alternate Encodings","text":""},{"location":"codec/#iscc_core.codec.encode_base64","title":"encode_base64(data)
","text":"Standard RFC4648 base64url encoding without padding.
Source code in iscc_core\\codec.py
def encode_base64(data):\n# type: (bytes) -> str\n\"\"\"\n Standard RFC4648 base64url encoding without padding.\n \"\"\"\ncode = urlsafe_b64encode(data).decode(\"ascii\")\nreturn code.rstrip(\"=\")\n
"},{"location":"codec/#iscc_core.codec.decode_base64","title":"decode_base64(code)
","text":"Standard RFC4648 base64url decoding without padding.
Source code in iscc_core\\codec.py
def decode_base64(code):\n# type: (str) -> bytes\n\"\"\"\n Standard RFC4648 base64url decoding without padding.\n \"\"\"\npadding = 4 - (len(code) % 4)\nstring = code + (\"=\" * padding)\nreturn urlsafe_b64decode(string)\n
"},{"location":"codec/#iscc_core.codec.encode_base32hex","title":"encode_base32hex(data)
","text":"RFC4648 Base32hex encoding without padding
see: https://tools.ietf.org/html/rfc4648#page-10
Source code in iscc_core\\codec.py
def encode_base32hex(data):\n# type: (bytes) -> str\n\"\"\"\n RFC4648 Base32hex encoding without padding\n see: https://tools.ietf.org/html/rfc4648#page-10\n \"\"\"\nb32 = encode_base32(data)\nreturn b32.translate(b32_to_hex)\n
"},{"location":"codec/#iscc_core.codec.decode_base32hex","title":"decode_base32hex(code)
","text":"RFC4648 Base32hex decoding without padding
see: https://tools.ietf.org/html/rfc4648#page-10
Source code in iscc_core\\codec.py
def decode_base32hex(code):\n# type: (str) -> bytes\n\"\"\"\n RFC4648 Base32hex decoding without padding\n see: https://tools.ietf.org/html/rfc4648#page-10\n \"\"\"\nb32 = code.translate(hex_to_b32)\nreturn decode_base32(b32)\n
"},{"location":"codec/#helper-functions","title":"Helper Functions","text":""},{"location":"codec/#iscc_core.codec.iscc_decode","title":"iscc_decode(iscc)
","text":"Decode ISCC to an IsccTuple
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description IsccTuple
ISCC decoded to a tuple
Source code in iscc_core\\codec.py
def iscc_decode(iscc):\n# type: (str) -> IsccTuple\n\"\"\"\n Decode ISCC to an IsccTuple\n :param str iscc: ISCC string\n :return: ISCC decoded to a tuple\n :rtype: IsccTuple\n \"\"\"\niscc = iscc_clean(iscc_normalize(iscc))\ndata = decode_base32(iscc)\nreturn decode_header(data)\n
"},{"location":"codec/#iscc_core.codec.iscc_explain","title":"iscc_explain(iscc)
","text":"Convert ISCC to a human-readable representation
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description str
Human-readable representation of ISCC
Source code in iscc_core\\codec.py
def iscc_explain(iscc):\n# type: (str) -> str\n\"\"\"\n Convert ISCC to a human-readable representation\n :param str iscc: ISCC string\n :return: Human-readable representation of ISCC\n :rtype: str\n \"\"\"\ntid = iscc_type_id(iscc)\nfields = iscc_decode(iscc)\nif fields[0] == MT.ID:\ncounter_bytes = fields[-1][8:]\nif counter_bytes:\ncounter = uvarint.decode(counter_bytes)\nhex_hash = fields[-1][:8].hex()\nreturn f\"{tid}-{hex_hash}-{counter.integer}\"\nhex_hash = fields[-1].hex()\nreturn f\"{tid}-{hex_hash}\"\n
"},{"location":"codec/#iscc_core.codec.iscc_type_id","title":"iscc_type_id(iscc)
","text":"Extract and convert ISCC HEADER to a readable Type-ID string.
Type-ids can be used as names in databases to index ISCC-UNITs seperatly.
Parameters:
Name Type Description Default iscc
str
ISCC string
required Returns:
Type Description str
Unique Type-ID string
Source code in iscc_core\\codec.py
def iscc_type_id(iscc):\n# type: (str) -> str\n\"\"\"\n Extract and convert ISCC HEADER to a readable Type-ID string.\n Type-ids can be used as names in databases to index ISCC-UNITs seperatly.\n :param str iscc: ISCC string\n :return: Unique Type-ID string\n :rtype: str\n \"\"\"\nfields = iscc_decode(iscc)\nmtype = MT(fields[0])\nstype = SUBTYPE_MAP[fields[0]](fields[1])\nif mtype == MT.ISCC:\nmtypes = decode_units(fields[3])\nlength = \"\".join([t.name[0] for t in mtypes]) + \"DI\"\nelse:\nlength = decode_length(fields[0], fields[3])\nversion = VS(fields[2])\nreturn f\"{mtype.name}-{stype.name}-{version.name}-{length}\"\n
"},{"location":"codec/#iscc_core.codec.iscc_validate","title":"iscc_validate(iscc, strict = True)
","text":"Validate that a given string is a strictly well-formed ISCC.
A strictly well-formed ISCC is:
- an ISCC-CODE or ISCC-UNIT
- encoded with base32 upper without padding
- has a valid combination of header values
- is represented in its canonical URI form
Parameters:
Name Type Description Default iscc
str
ISCC string
required strict
bool
Raise an exeption if validation fails (default True)
True
Returns:
Type Description bool
True if sting is valid else false. (raises ValueError in strict mode)
Source code in iscc_core\\codec.py
def iscc_validate(iscc, strict=True):\n# type: (str, bool) -> bool\n\"\"\"\n Validate that a given string is a *strictly well-formed* ISCC.\n A *strictly well-formed* ISCC is:\n - an ISCC-CODE or ISCC-UNIT\n - encoded with base32 upper without padding\n - has a valid combination of header values\n - is represented in its canonical URI form\n :param str iscc: ISCC string\n :param bool strict: Raise an exeption if validation fails (default True)\n :return: True if sting is valid else false. (raises ValueError in strict mode)\n :rtype: bool\n \"\"\"\n# Basic regex validation\nmatch = re.match(\"^ISCC:[A-Z2-7]{10,60}$\", iscc)\nif not match:\nif strict:\nraise ValueError(\"ISCC string does not match ^ISCC:[A-Z2-7]{10,60}$\")\nelse:\nreturn False\ncleaned = iscc_clean(iscc)\nprefix = cleaned[:2]\nif prefix not in PREFIXES:\nif strict:\nraise ValueError(f\"Header starts with invalid sequence {prefix}\")\nelse:\nreturn False\nm, s, v, l, t = decode_header(decode_base32(cleaned))\nif v != 0:\nif strict:\nraise ValueError(f\"Unknown version {v} in version header\")\nelse:\nreturn False\nreturn True\n
"},{"location":"codec/#iscc_core.codec.iscc_clean","title":"iscc_clean(iscc)
","text":"Cleanup ISCC string.
Removes leading scheme, dashes, leading/trailing whitespace.
Parameters:
Name Type Description Default iscc
str
Any valid ISCC string
required Returns:
Type Description str
Cleaned ISCC string.
Source code in iscc_core\\codec.py
def iscc_clean(iscc):\n# type: (str) -> str\n\"\"\"\n Cleanup ISCC string.\n Removes leading scheme, dashes, leading/trailing whitespace.\n :param str iscc: Any valid ISCC string\n :return: Cleaned ISCC string.\n :rtype: str\n \"\"\"\nsplit = [part.strip() for part in iscc.strip().split(\":\")]\nif len(split) == 1:\ncode = split[0]\n# remove dashes if not multiformat\nif code[0] not in list(MULTIBASE):\ncode = code.replace(\"-\", \"\")\nreturn code\nelif len(split) == 2:\nscheme, code = split\nif scheme.lower() != \"iscc\":\nraise ValueError(f\"Invalid scheme: {scheme}\")\nreturn code.replace(\"-\", \"\")\nelse:\nraise ValueError(f\"Malformed ISCC string: {iscc}\")\n
"},{"location":"options/options/","title":"ISCC-CORE - Configuration Options","text":"Options for the iscc-core package can be configured using environment variables. Variables are loaded as class-attributes on the CoreOptions
instance. Environment variables are named like the class-attribute but prefixed with ISCC_CORE_
and upper-cased.
Example how to access configuration options
import iscc_core as ic\n# To access ISCC_CORE_TEXT_NGRAM_SIZE setting use\ntext_ngram_size: int = ic.core_opts.text_ngram_size\n
"},{"location":"options/options/#iscc_core.options.CoreOptions","title":"CoreOptions","text":"Parameters with defaults for ISCC calculations.
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_bits","title":"meta_bits instance-attribute
class-attribute
","text":"meta_bits: int = Field(\n64,\ndescription=\"Default length of generated Meta-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_trim_name","title":"meta_trim_name instance-attribute
class-attribute
","text":"meta_trim_name: int = Field(\n128, description=\"Trim `name` to this mumber of bytes\"\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_trim_description","title":"meta_trim_description instance-attribute
class-attribute
","text":"meta_trim_description: int = Field(\n4096,\ndescription=\"Trim `description` to this number of bytes\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_ngram_size_text","title":"meta_ngram_size_text instance-attribute
class-attribute
","text":"meta_ngram_size_text: int = Field(\n3,\ndescription=\"Sliding window width (characters) for metadata\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.meta_ngram_size_bytes","title":"meta_ngram_size_bytes instance-attribute
class-attribute
","text":"meta_ngram_size_bytes: int = Field(\n4,\ndescription=\"Sliding window width (bytes) for metadata\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_bits","title":"text_bits instance-attribute
class-attribute
","text":"text_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Text in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_ngram_size","title":"text_ngram_size instance-attribute
class-attribute
","text":"text_ngram_size: int = Field(\n13,\ndescription=\"Number of characters per feature hash (size of sliding window)\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_unicode_filter","title":"text_unicode_filter instance-attribute
class-attribute
","text":"text_unicode_filter: frozenset = Field(\nfrozenset({\"C\", \"M\", \"P\"}),\ndescription=\"Unicode categories to remove during text normalization\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.text_newlines","title":"text_newlines instance-attribute
class-attribute
","text":"text_newlines: frozenset = Field(\nfrozenset(\n{\n\"\\n\",\n\"\\x0b\",\n\"\\x0c\",\n\"\\r\",\n\"\\x85\",\n\"\\u2028\",\n\"\\u2029\",\n}\n),\ndescription=\"Characters regarded as newline characters for normalization purposes\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.image_bits","title":"image_bits instance-attribute
class-attribute
","text":"image_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Image in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.audio_bits","title":"audio_bits instance-attribute
class-attribute
","text":"audio_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Audio in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.video_bits","title":"video_bits instance-attribute
class-attribute
","text":"video_bits: int = Field(\n64,\ndescription=\"Default length of generated Content-Code Video in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.data_bits","title":"data_bits instance-attribute
class-attribute
","text":"data_bits: int = Field(\n64,\ndescription=\"Default length of generated Data-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.data_avg_chunk_size","title":"data_avg_chunk_size instance-attribute
class-attribute
","text":"data_avg_chunk_size: int = Field(\n1024,\ndescription=\"Target chunk size for data chunking in number of bytes.\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.instance_bits","title":"instance_bits instance-attribute
class-attribute
","text":"instance_bits: int = Field(\n64,\ndescription=\"Default length of generated Instance-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.mixed_bits","title":"mixed_bits instance-attribute
class-attribute
","text":"mixed_bits: int = Field(\n64,\ndescription=\"Default length of generated Mixed-Code in bits\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.io_read_size","title":"io_read_size instance-attribute
class-attribute
","text":"io_read_size: int = Field(\n2097152,\ndescription=\"File read buffer size in bytes for hashing operations\",\n)\n
"},{"location":"options/options/#iscc_core.options.CoreOptions.cdc_gear","title":"cdc_gear instance-attribute
class-attribute
","text":"cdc_gear: Tuple = Field(\n(\n1553318008,\n574654857,\n759734804,\n310648967,\n1393527547,\n1195718329,\n694400241,\n1154184075,\n1319583805,\n1298164590,\n122602963,\n989043992,\n1918895050,\n933636724,\n1369634190,\n1963341198,\n1565176104,\n1296753019,\n1105746212,\n1191982839,\n1195494369,\n29065008,\n1635524067,\n722221599,\n1355059059,\n564669751,\n1620421856,\n1100048288,\n1018120624,\n1087284781,\n1723604070,\n1415454125,\n737834957,\n1854265892,\n1605418437,\n1697446953,\n973791659,\n674750707,\n1669838606,\n320299026,\n1130545851,\n1725494449,\n939321396,\n748475270,\n554975894,\n1651665064,\n1695413559,\n671470969,\n992078781,\n1935142196,\n1062778243,\n1901125066,\n1935811166,\n1644847216,\n744420649,\n2068980838,\n1988851904,\n1263854878,\n1979320293,\n111370182,\n817303588,\n478553825,\n694867320,\n685227566,\n345022554,\n2095989693,\n1770739427,\n165413158,\n1322704750,\n46251975,\n710520147,\n700507188,\n2104251000,\n1350123687,\n1593227923,\n1756802846,\n1179873910,\n1629210470,\n358373501,\n807118919,\n751426983,\n172199468,\n174707988,\n1951167187,\n1328704411,\n2129871494,\n1242495143,\n1793093310,\n1721521010,\n306195915,\n1609230749,\n1992815783,\n1790818204,\n234528824,\n551692332,\n1930351755,\n110996527,\n378457918,\n638641695,\n743517326,\n368806918,\n1583529078,\n1767199029,\n182158924,\n1114175764,\n882553770,\n552467890,\n1366456705,\n934589400,\n1574008098,\n1798094820,\n1548210079,\n821697741,\n601807702,\n332526858,\n1693310695,\n136360183,\n1189114632,\n506273277,\n397438002,\n620771032,\n676183860,\n1747529440,\n909035644,\n142389739,\n1991534368,\n272707803,\n1905681287,\n1210958911,\n596176677,\n1380009185,\n1153270606,\n1150188963,\n1067903737,\n1020928348,\n978324723,\n962376754,\n1368724127,\n1133797255,\n1367747748,\n1458212849,\n537933020,\n1295159285,\n2104731913,\n1647629177,\n1691336604,\n922114202,\n170715530,\n1608833393,\n62657989,\n1140989235,\n381784875,\n928003604,\n449509021,\n1057208185,\n1239816707,\n525522922,\n476962140,\n102897870,\n132620570,\n419788154,\n2095057491,\n1240747817,\n1271689397,\n973007445,\n1380110056,\n1021668229,\n12064370,\n1186917580,\n1017163094,\n597085928,\n2018803520,\n1795688603,\n1722115921,\n2015264326,\n506263638,\n1002517905,\n1229603330,\n1376031959,\n763839898,\n1970623926,\n1109937345,\n524780807,\n1976131071,\n905940439,\n1313298413,\n772929676,\n1578848328,\n1108240025,\n577439381,\n1293318580,\n1512203375,\n371003697,\n308046041,\n320070446,\n1252546340,\n568098497,\n1341794814,\n1922466690,\n480833267,\n1060838440,\n969079660,\n1836468543,\n2049091118,\n2023431210,\n383830867,\n2112679659,\n231203270,\n1551220541,\n1377927987,\n275637462,\n2110145570,\n1700335604,\n738389040,\n1688841319,\n1506456297,\n1243730675,\n258043479,\n599084776,\n41093802,\n792486733,\n1897397356,\n28077829,\n1520357900,\n361516586,\n1119263216,\n209458355,\n45979201,\n363681532,\n477245280,\n2107748241,\n601938891,\n244572459,\n1689418013,\n1141711990,\n1485744349,\n1181066840,\n1950794776,\n410494836,\n1445347454,\n2137242950,\n852679640,\n1014566730,\n1999335993,\n1871390758,\n1736439305,\n231222289,\n603972436,\n783045542,\n370384393,\n184356284,\n709706295,\n1453549767,\n591603172,\n768512391,\n854125182,\n),\ndescription=\"Random gear vector\",\n)\n
"},{"location":"options/options/#iscc_core.options.conformanc_critical","title":"conformanc_critical module-attribute
","text":"conformanc_critical = {\n\"meta_trim_name\",\n\"meta_trim_description\",\n\"meta_ngram_size_text\",\n\"meta_ngram_size_bytes\",\n\"text_ngram_size\",\n\"text_unicode_filter\",\n\"text_newlines\",\n\"data_avg_chunk_size\",\n\"cdc_gear\",\n}\n
"},{"location":"options/options/#iscc_core.options.has_logged_confromance","title":"has_logged_confromance module-attribute
","text":"has_logged_confromance = False\n
"},{"location":"options/options/#iscc_core.options.conformance_check_options","title":"conformance_check_options","text":"conformance_check_options(opts)\n
Check and log if options have non-default conformance critical values
"},{"location":"options/options/#iscc_core.options.core_opts","title":"core_opts module-attribute
","text":"core_opts = CoreOptions()\n
"},{"location":"options/options/#iscc_core.options.conformant_options","title":"conformant_options module-attribute
","text":"conformant_options = conformance_check_options(core_opts)\n
"},{"location":"units/","title":"ISCC - UNITs","text":"A standard ISCC-CODE is build from multiple ISCC-UNITs. Each unit serve a different purpose.
"},{"location":"units/code_data/","title":"ISCC - Data-Code","text":"A similarity perserving hash for binary data (soft hash).
"},{"location":"units/code_data/#iscc_core.code_data.gen_data_code","title":"gen_data_code(stream, bits = ic.core_opts.data_bits)
","text":"Create a similarity preserving ISCC Data-Code with the latest standard algorithm.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.gen_data_code_v0","title":"gen_data_code_v0(stream, bits = ic.core_opts.data_bits)
","text":"Create an ISCC Data-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required bits
int
Bit-length of ISCC Data-Code (default 64).
ic.core_opts.data_bits
Returns:
Type Description dict
ISCC object with Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.soft_hash_data_v0","title":"soft_hash_data_v0(stream)
","text":"Create a similarity preserving Data-Hash digest
Parameters:
Name Type Description Default stream
Stream
Input data stream.
required Returns:
Type Description bytes
256-bit Data-Hash (soft-hash) digest used as body for Data-Code
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0","title":"DataHasherV0
","text":"Incremental Data-Hash generator.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.__init__","title":"__init__(data = None)
","text":"Create a DataHasher
Parameters:
Name Type Description Default data
Optional[Data]
initial payload for hashing.
None
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.push","title":"push(data)
","text":"Push data to the Data-Hash generator.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.digest","title":"digest()
","text":"Calculate 256-bit minhash digest from feature hashes.
"},{"location":"units/code_data/#iscc_core.code_data.DataHasherV0.code","title":"code(bits = ic.core_opts.data_bits)
","text":"Encode digest as an ISCC Data-Code unit.
Parameters:
Name Type Description Default bits
int
Number of bits for the ISCC Data-Code
ic.core_opts.data_bits
Returns:
Type Description str
ISCC Data-Code
"},{"location":"units/code_flake/","title":"ISCC - Flake-Code","text":"A unique, time-sorted identifier composed of an 48-bit timestamp and 16 to 208 bit randomness.
The ISCC Flake-Code is a unique identifier for distributed ID generation. The 64-bit version can be used as efficient surrogate key in database systems. It has guaranteed uniqueness if generated from a singele process and is time sortable in integer and base32hex representation. The 128-bit version is a K-sortable, globally unique identifier for use in distributed systems and is compatible with UUID.
Example
>>> import iscc_core as ic\n>>> ic.gen_flake_code(bits=64)\n{'iscc': 'ISCC:OAAQC7YN7PG2XOR4'}\n>>> ic.gen_flake_code(bits=128)\n{'iscc': 'ISCC:OABQC7YN7RJGUUTLKDSKBXO25MA5E'}\n# Or use the convenience Flake class for easy access to different representations\n>>> flake = ic.Flake(bits=64)\n>>> flake.iscc\n'ISCC:OAAQC7YOADBZYNF7'\n>>> flake.time\n'2022-02-18T18:03:25.468'\n>>> flake.int\n107820312524764351\n>>> flake.string\n'05VGS063JGQBU'\n
"},{"location":"units/code_flake/#iscc_core.code_flake.gen_flake_code","title":"gen_flake_code(bits = ic.core_opts.flake_bits)
","text":"Create an ISCC Flake-Code with the latest standard algorithm
Parameters:
Name Type Description Default bits
int
Target bit-length of generated Flake-Code
ic.core_opts.flake_bits
Returns:
Type Description dict
ISCC object with Flake-Code
"},{"location":"units/code_flake/#iscc_core.code_flake.gen_flake_code_v0","title":"gen_flake_code_v0(bits = ic.core_opts.flake_bits)
","text":"Create an ISCC Flake-Code with the latest algorithm v0
Parameters:
Name Type Description Default bits
int
Target bit-length of generated Flake-Code
ic.core_opts.flake_bits
Returns:
Type Description dict
ISCC object with Flake-Code
"},{"location":"units/code_flake/#iscc_core.code_flake.uid_flake_v0","title":"uid_flake_v0(ts = None, bits = ic.core_opts.flake_bits)
","text":"Generate time and randomness based Flake-Hash
Parameters:
Name Type Description Default ts
Optional[float]
Unix timestamp (defaults to current time)
None
bits
int
Bit-length resulting Flake-Code (multiple of 32)
ic.core_opts.flake_bits
Returns:
Type Description bytes
Flake-Hash digest
"},{"location":"units/code_instance/","title":"ISCC - Instance-Code","text":"A data checksum.
"},{"location":"units/code_instance/#iscc_core.code_instance.gen_instance_code","title":"gen_instance_code(stream, bits = ic.core_opts.instance_bits)
","text":"Create an ISCC Instance-Code with the latest standard algorithm.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with properties: iscc, datahash, filesize
"},{"location":"units/code_instance/#iscc_core.code_instance.gen_instance_code_v0","title":"gen_instance_code_v0(stream, bits = ic.core_opts.instance_bits)
","text":"Create an ISCC Instance-Code with algorithm v0.
Parameters:
Name Type Description Default stream
Stream
Binary data stream for Instance-Code generation
required bits
int
Bit-length of resulting Instance-Code (multiple of 64)
ic.core_opts.instance_bits
Returns:
Type Description dict
ISCC object with Instance-Code and properties: datahash, filesize
"},{"location":"units/code_instance/#iscc_core.code_instance.hash_instance_v0","title":"hash_instance_v0(stream)
","text":"Create 256-bit hash digest for the Instance-Code body
Parameters:
Name Type Description Default stream
Stream
Binary data stream for hash generation.
required Returns:
Type Description bytes
256-bit Instance-Hash digest used as body of Instance-Code
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0","title":"InstanceHasherV0
","text":"Incremental Instance-Hash generator.
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.push","title":"push(data)
","text":"Push data to the Instance-Hash generator.
Parameters:
Name Type Description Default data
Data
Data to be hashed
required"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.digest","title":"digest()
","text":"Return Instance-Hash
Returns:
Type Description bytes
Instance-Hash digest
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.multihash","title":"multihash()
","text":"Return blake3 multihash
Returns:
Type Description str
Blake3 hash as 256-bit multihash
"},{"location":"units/code_instance/#iscc_core.code_instance.InstanceHasherV0.code","title":"code(bits = ic.core_opts.instance_bits)
","text":"Encode digest as an ISCC Instance-Code unit.
Parameters:
Name Type Description Default bits
int
Number of bits for the ISCC Instance-Code
ic.core_opts.instance_bits
Returns:
Type Description str
ISCC Instance-Code
"},{"location":"units/code_meta/","title":"ISCC - Meta-Code","text":"A similarity preserving hash for digital asset metadata.
"},{"location":"units/code_meta/#purpose","title":"Purpose","text":"The Meta-Code is the first possible (optional) unit of an ISCC-CODE. It is calculated from the metadata of a digital asset. The primary purpose of the Meta-Code is to aid the discovery of digital assets with similar metadata and the detection of metadata anomalies. As a secondary function, Meta-Code processing also creates a secure Meta-Hash for cryptogrpahic binding purposes.
"},{"location":"units/code_meta/#inputs","title":"Inputs","text":"The metadata supplied for Meta-Code calculation is called Seed-Metadata. Seed-Metadata has 3 possible elements:
- name (required): The name or title of the work manifested by the digital asset.
- description (optional): A disambiguating textual description of the digital asset.
- meta (optional): Industry-sector or use-case specific metadata, encoded as Data-URL (RFC-2397).
Note
Due to the broad applicability of the ISCC we do not prescribe a particular schema for the meta
-element. Instead we use the Data-URL format because it can encode and self-describe any conceivable metadata in a sufficently machine-interpretable form at any desired specificity.
Data-URL Examples:
- Metadata is \"some\" JSON:
data:application/json;base64,<data>
- Metadata is JSON-LD:
data:application/ld+json;base64,<data>
- Metadata is \"some\" XML:
data:application/xml;base64,<data>
- Metadata is MARC21 XML:
data:application/marcxml+xml;base64,<data>
- Metadata is IPTC NewsML:
data:application/vnd.iptc.g2.newsitem+xml;base64,<data>
Data-URLs are also supported by all major internet browsers.
"},{"location":"units/code_meta/#processing","title":"Processing","text":""},{"location":"units/code_meta/#meta-code","title":"Meta-Code","text":"The first 32-bits of a Meta-Code are calculated as a simliarity hash from the name
field. The second 32-bits are also calculated from the name
field if no other input was supplied. If description
is suplied but no meta
, the description
will be used for the second 32-bits. If meta
is supplied it will be used in favour of description
for the second 32-bits.
flowchart LR\n B{name?}\n B -->|Yes| J[\"P1 = SH(name)\"]\n J --> D{meta?}\n D -->|Yes| F[\"Meta-Code = P1 + SH(meta)\"]\n D -->|No| G{description?}\n G -->|Yes| H[\"Meta-Code = P1 + SH(description)\"]\n G -->|No| I[\"Meta-Code = P1\"]\n B -->|No| E[Skip Meta-Code]
Note
To support automation and reproducibility, applications that generate ISCCs, should prioritize metadata that is automatically extracted from the digital asset.
If embedded metadata is not available or known to be unreliable an application should rely on external metadata or explicitly ask users to supply at least the name
-field. Applications should then first embed metadata into the asset before calculating the ISCC-CODE. This ensures that the embedded metadata is bound to the asset by the Instance-Code.
If neither embedded nor external metadata is available, the application may resort to use the filename of the digital asset as value for the name
-field. If no value can be determined for the name
-field, an application shall skip generation of a Meta-Code and create an ISCC-CODE without a Meta-Code.
"},{"location":"units/code_meta/#meta-hash","title":"Meta-Hash","text":"In addition to the Meta-Code we also create a cryptographic hash (the Meta-Hash) of the supplied Seed-Metadata. It is used to securely bind metadata to the digital asset.
flowchart LR\n N{name?}\n N -->|Yes| A{meta?}\n N -->|No| E[Skip Meta-Hash]\n A -->|Yes| H[\"Meta-Hash = H(meta)\"]\n A -->|No| B{description?}\n B -->|Yes| HND[\"Meta-Hash = H(name + description)\"]\n B -->|No| HN[\"Meta-Hash = H(name)\"]
"},{"location":"units/code_meta/#functions","title":"Functions","text":""},{"location":"units/code_meta/#iscc_core.code_meta.gen_meta_code_v0","title":"gen_meta_code_v0(name, description = None, meta = None, bits = ic.core_opts.meta_bits)
","text":"Create an ISCC Meta-Code with the algorithm version 0.
Parameters:
Name Type Description Default name
str
Name or title of the work manifested by the digital asset
required description
Optional[str]
Optional description for disambiguation
None
meta
Optional[Union[dict,str]
Dict or Data-URL string with extended metadata
None
bits
int
Bit-length of resulting Meta-Code (multiple of 64)
ic.core_opts.meta_bits
Returns:
Type Description dict
ISCC object with possible fields: iscc, name, description, metadata, metahash
Source code in iscc_core\\code_meta.py
def gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_bits):\n# type: (str, Optional[str], Optional[ic.Meta], int) -> dict\n\"\"\"\n Create an ISCC Meta-Code with the algorithm version 0.\n :param str name: Name or title of the work manifested by the digital asset\n :param Optional[str] description: Optional description for disambiguation\n :param Optional[Union[dict,str] meta: Dict or Data-URL string with extended metadata\n :param int bits: Bit-length of resulting Meta-Code (multiple of 64)\n :return: ISCC object with possible fields: iscc, name, description, metadata, metahash\n :rtype: dict\n \"\"\"\n# 1. Normalize `name`\nname = \"\" if name is None else name\nname = text_clean(name)\nname = text_remove_newlines(name)\nname = text_trim(name, ic.core_opts.meta_trim_name)\nif not name:\nraise ValueError(\"Meta-Code requires non-empty name element (after normalization)\")\n# 2. Normalize `description`\ndescription = \"\" if description is None else description\ndescription = text_clean(description)\ndescription = text_trim(description, ic.core_opts.meta_trim_description)\n# Calculate meta_code, metahash, and output metadata values for the different input cases\nif meta:\nif isinstance(meta, str):\n# Data-URL expected\ndurl = meta\npayload = DataURL.from_url(durl).data\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = durl\nelif isinstance(meta, dict):\npayload = jcs.canonicalize(meta)\nmeta_code_digest = soft_hash_meta_v0(name, payload)\nmetahash = ic.multi_hash_blake3(payload)\nmedia_type = \"application/ld+json\" if \"@context\" in meta else \"application/json\"\ndurl_obj = DataURL.from_data(media_type, base64_encode=True, data=payload)\nmetadata_value = durl_obj.url\nelse:\nraise TypeError(f\"metadata must be Data-URL string or dict not {type(meta)}\")\nelse:\npayload = \" \".join((name, description)).strip().encode(\"utf-8\")\nmeta_code_digest = soft_hash_meta_v0(name, description)\nmetahash = ic.multi_hash_blake3(payload)\nmetadata_value = None\nmeta_code = ic.encode_component(\nmtype=ic.MT.META,\nstype=ic.ST.NONE,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=meta_code_digest,\n)\niscc = \"ISCC:\" + meta_code\n# Build result\nresult = {\"iscc\": iscc}\nif name:\nresult[\"name\"] = name\nif description:\nresult[\"description\"] = description\nif metadata_value:\nresult[\"meta\"] = metadata_value\nresult[\"metahash\"] = metahash\nreturn result\n
"},{"location":"units/code_meta/#iscc_core.code_meta.soft_hash_meta_v0","title":"soft_hash_meta_v0(name, extra = None)
","text":"Calculate simmilarity preserving 256-bit hash digest from asset metadata.
Textual input should be stripped of markup, normalized and trimmed before hashing. Bytes input can be any serialized metadata (JSON, XML, Image...). Metadata should be serialized in a canonical form (for example JCS for JSON)
Note
The processing algorithm depends on the type of the extra
input. If the extra
field is supplied and non-empty, we create separate hashes for name
and extra
and interleave them in 32-bit chunks:
-
If the extra
input is None
or an empty str
/bytes
object the Meta-Hash will be generated from the name
-field only.
-
If the extra
-input is a non-empty text string (str) the string is lower-cased and the processing units are utf-8 endoded characters (possibly multibyte).
-
If the extra
-input is a non-empty bytes object the processing is done bytewise.
Parameters:
Name Type Description Default name
str
Title of the work manifested in the digital asset
required extra
Union[str,bytes,None]
Additional metadata for disambiguation
None
Returns:
Type Description bytes
256-bit simhash digest for Meta-Code
Source code in iscc_core\\code_meta.py
def soft_hash_meta_v0(name, extra=None):\n# type: (str, Union[str,bytes,None]) -> bytes\n\"\"\"\n Calculate simmilarity preserving 256-bit hash digest from asset metadata.\n Textual input should be stripped of markup, normalized and trimmed before hashing.\n Bytes input can be any serialized metadata (JSON, XML, Image...). Metadata should be\n serialized in a canonical form (for example\n [JCS](https://tools.ietf.org/id/draft-rundgren-json-canonicalization-scheme-00.html) for JSON)\n !!! note\n The processing algorithm depends on the type of the `extra` input.\n If the `extra` field is supplied and non-empty, we create separate hashes for\n `name` and `extra` and interleave them in 32-bit chunks:\n - If the `extra` input is `None` or an empty `str`/`bytes` object the Meta-Hash will\n be generated from the `name`-field only.\n - If the `extra`-input is a non-empty **text** string (str) the string is\n lower-cased and the processing units are utf-8 endoded characters (possibly multibyte).\n - If the `extra`-input is a non-empty **bytes** object the processing is done bytewise.\n :param str name: Title of the work manifested in the digital asset\n :param Union[str,bytes,None] extra: Additional metadata for disambiguation\n :return: 256-bit simhash digest for Meta-Code\n :rtype: bytes\n \"\"\"\nname = ic.text_collapse(name)\nname_n_grams = ic.sliding_window(name, width=ic.core_opts.meta_ngram_size_text)\nname_hash_digests = [blake3(s.encode(\"utf-8\")).digest() for s in name_n_grams]\nsimhash_digest = ic.alg_simhash(name_hash_digests)\nif extra in {None, \"\", b\"\"}:\nreturn simhash_digest\nelse:\n# Augment with interleaved hash for extra metadata\nif isinstance(extra, bytes):\n# Raw bytes are handled per byte\nextra_n_grams = ic.sliding_window(extra, width=ic.core_opts.meta_ngram_size_bytes)\nextra_hash_digests = [blake3(ngram).digest() for ngram in extra_n_grams]\nelif isinstance(extra, str):\n# Text is collapsed and handled per character (multibyte)\nextra = ic.text_collapse(extra)\nextra_n_grams = ic.sliding_window(extra, width=ic.core_opts.meta_ngram_size_text)\nextra_hash_digests = [blake3(s.encode(\"utf-8\")).digest() for s in extra_n_grams]\nelse:\nraise ValueError(\"parameter `extra` must be of type str or bytes!\")\nextra_simhash_digest = ic.alg_simhash(extra_hash_digests)\n# Interleave first half of name and extra simhashes in 32-bit chunks\nchunks_simhash_digest = sliced(simhash_digest[:16], 4)\nchunks_extra_simhash_digest = sliced(extra_simhash_digest[:16], 4)\ninterleaved = interleave(chunks_simhash_digest, chunks_extra_simhash_digest)\nsimhash_digest = bytearray()\nfor chunk in interleaved:\nsimhash_digest += chunk\nsimhash_digest = bytes(simhash_digest)\nreturn simhash_digest\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_clean","title":"text_clean(text)
","text":"Clean text for display.
- Normalize with NFKC normalization.
- Remove Control Characters (except newlines)
- Reduce multiple consecutive newlines to a maximum of two newlines
- Strip leading and trailing whitespace
Source code in iscc_core\\code_meta.py
def text_clean(text):\n# type: (str) -> str\n\"\"\"\n Clean text for display.\n - Normalize with NFKC normalization.\n - Remove Control Characters (except newlines)\n - Reduce multiple consecutive newlines to a maximum of two newlines\n - Strip leading and trailing whitespace\n \"\"\"\n# Unicode normalize\ntext = unicodedata.normalize(\"NFKC\", text)\n# Remove control characters\ntext = \"\".join(\nch for ch in text if unicodedata.category(ch)[0] != \"C\" or ch in ic.core_opts.text_newlines\n)\n# Collapse more than two consecutive newlines\nchars = []\nnewline_count = 0\nfor c in text:\nif c in ic.core_opts.text_newlines:\nif newline_count < 2:\nchars.append(\"\\n\")\nnewline_count += 1\ncontinue\nelse:\nnewline_count = 0\nchars.append(c)\ntext = \"\".join(chars)\nreturn text.strip()\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_remove_newlines","title":"text_remove_newlines(text)
","text":"Remove newlines.
The name
field serves as a displayable title. We remove newlines and leading and trailing whitespace. We also collapse consecutive spaces to single spaces.
Parameters:
Name Type Description Default text
Text for newline removal
required Returns:
Type Description str
Single line of text
Source code in iscc_core\\code_meta.py
def text_remove_newlines(text):\n# type: (str) -> str\n\"\"\"\n Remove newlines.\n The `name` field serves as a displayable title. We remove newlines and leading and trailing\n whitespace. We also collapse consecutive spaces to single spaces.\n :param text: Text for newline removal\n :return: Single line of text\n :rtype: str\n \"\"\"\nreturn \" \".join(text.split())\n
"},{"location":"units/code_meta/#iscc_core.code_meta.text_trim","title":"text_trim(text, nbytes)
","text":"Trim text such that its utf-8 encoded size does not exceed nbytes
.
Source code in iscc_core\\code_meta.py
def text_trim(text, nbytes):\n# type: (str, int) -> str\n\"\"\"Trim text such that its utf-8 encoded size does not exceed `nbytes`.\"\"\"\nreturn text.encode(\"utf-8\")[:nbytes].decode(\"utf-8\", \"ignore\").strip()\n
"},{"location":"units/content/","title":"ISCC - Content-Codes","text":""},{"location":"units/content/code_content_audio/","title":"ISCC - Audio-Code","text":"A similarity preserving hash for audio content (soft hash).
Creates an ISCC object that provides an iscc
-field with an Audio-Code and a duration
-field.
The Content-Code Audio is generated from a Chromaprint fingerprint provided as a vector of 32-bit signed integers. The iscc-sdk uses fpcalc to extract Chromaprint vectors with the following command line parameters:
$ fpcalc -raw -json -signed -length 0 myaudiofile.mp3
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.gen_audio_code","title":"gen_audio_code(cv, bits = ic.core_opts.audio_bits)
","text":"Create an ISCC Content-Code Audio with the latest standard algorithm.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.gen_audio_code_v0","title":"gen_audio_code_v0(cv, bits = ic.core_opts.audio_bits)
","text":"Create an ISCC Content-Code Audio with algorithm v0.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting Content-Code Audio (multiple of 64)
ic.core_opts.audio_bits
Returns:
Type Description dict
ISCC object with Content-Code Audio
"},{"location":"units/content/code_content_audio/#iscc_core.code_content_audio.soft_hash_audio_v0","title":"soft_hash_audio_v0(cv, bits = ic.core_opts.audio_bits)
","text":"Create audio similarity hash from a chromaprint vector.
Parameters:
Name Type Description Default cv
Iterable[int]
Chromaprint vector
required bits
int
Bit-length resulting similarity hash (multiple of 32)
ic.core_opts.audio_bits
Returns:
Type Description bytes
Audio-Hash digest
"},{"location":"units/content/code_content_image/","title":"ISCC - Image-Code","text":"A similarity preserving perceptual hash for images.
The ISCC Content-Code Image is created by calculating a discrete cosine transform on normalized image-pixels and comparing the values from the upper left area of the dct-matrix against their median values to set the hash-bits.
Images must be normalized before using gen_image_code. Prepare images as follows:
- Transpose image according to EXIF Orientation
- Add white background to image if it has alpha transparency
- Crop empty borders of image
- Convert image to grayscale
- Resize image to 32x32
- Flatten 32x32 matrix to an array of 1024 grayscale (uint8) pixel values
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.gen_image_code","title":"gen_image_code(pixels, bits = ic.core_opts.image_bits)
","text":"Create an ISCC Content-Code Image with the latest standard algorithm.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values).
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.gen_image_code_v0","title":"gen_image_code_v0(pixels, bits = ic.core_opts.image_bits)
","text":"Create an ISCC Content-Code Image with algorithm v0.
Parameters:
Name Type Description Default pixels
Sequence[int]
Normalized image pixels (32x32 flattened gray values)
required bits
int
Bit-length of ISCC Content-Code Image (default 64).
ic.core_opts.image_bits
Returns:
Type Description ISCC
ISCC object with Content-Code Image.
"},{"location":"units/content/code_content_image/#iscc_core.code_content_image.soft_hash_image_v0","title":"soft_hash_image_v0(pixels, bits = ic.core_opts.image_bits)
","text":"Calculate image hash from normalized grayscale pixel sequence of length 1024.
Parameters:
Name Type Description Default pixels
Sequence[int]
required bits
int
Bit-length of image hash (default 64).
ic.core_opts.image_bits
Returns:
Type Description bytes
Similarity preserving Image-Hash digest.
"},{"location":"units/content/code_content_mixed/","title":"ISCC - Mixed Code","text":"A similarity hash for mixed media content.
Creates an ISCC object that provides a iscc
-field a Mixed-Code and a parts
-field that lists the input codes.
Many digital assets embed multiple assets of different mediatypes in a single file. Text documents may include images, video includes audio in most cases. The ISCC Content-Code-Mixed encodes the similarity of a collection of assets of the same or different mediatypes that may occur in a multimedia asset.
Applications that create mixed Content-Codes must be capable to extract embedded assets and create individual Content-Codes per asset.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.gen_mixed_code","title":"gen_mixed_code(codes, bits = ic.core_opts.mixed_bits)
","text":"Create an ISCC Content-Code Mixed with the latest standard algorithm.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.gen_mixed_code_v0","title":"gen_mixed_code_v0(codes, bits = ic.core_opts.mixed_bits)
","text":"Create an ISCC Content-Code-Mixed with algorithm v0.
If the provided codes are of mixed length they are stripped to bits
length for calculation.
Parameters:
Name Type Description Default codes
Iterable[str]
a list of Content-Codes.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description dict
ISCC object with Content-Code Mixed.
"},{"location":"units/content/code_content_mixed/#iscc_core.code_content_mixed.soft_hash_codes_v0","title":"soft_hash_codes_v0(cc_digests, bits = ic.core_opts.mixed_bits)
","text":"Create a similarity hash from multiple Content-Code digests.
The similarity hash is created from the bodies of the input codes with the first byte of the code-header prepended.
All codes must be of main-type CONTENT and have a minimum length of bits
.
Parameters:
Name Type Description Default cc_digests
Sequence[bytes]
a list of Content-Code digests.
required bits
int
Target bit-length of generated Content-Code-Mixed.
ic.core_opts.mixed_bits
Returns:
Type Description bytes
Similarity preserving byte hash.
"},{"location":"units/content/code_content_text/","title":"ISCC - Text Code","text":"A similarity preserving hash for plain-text content (soft hash).
The ISCC Text-Code is generated from plain-text that has been extracted from a media assets.
Warning
Plain-text extraction from documents in various formats (especially PDF) may yield very diffent results depending on the extraction tools being used. The iscc-sdk uses Apache Tika to extract text from documents for Text-Code generation.
Algorithm overview
- Apply
text_collapse
function to text input - Count characters of collapsed text
- Apply
soft_hash_text_v0
to collapsed text
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.gen_text_code_v0","title":"gen_text_code_v0(text, bits = ic.core_opts.text_bits)
","text":"Create an ISCC Text-Code with algorithm v0.
Note
Any markup (like HTML tags or markdown) should be removed from the plain-text before passing it to this function.
Parameters:
Name Type Description Default text
str
Text for Text-Code creation
required bits
int
Bit-length of ISCC Code Hash (default 64)
ic.core_opts.text_bits
Returns:
Type Description dict
ISCC schema instance with Text-Code and an aditional property characters
Source code in iscc_core\\code_content_text.py
def gen_text_code_v0(text, bits=ic.core_opts.text_bits):\n# type: (str, int) -> dict\n\"\"\"\n Create an ISCC Text-Code with algorithm v0.\n !!! note\n Any markup (like HTML tags or markdown) should be removed from the plain-text\n before passing it to this function.\n :param str text: Text for Text-Code creation\n :param int bits: Bit-length of ISCC Code Hash (default 64)\n :return: ISCC schema instance with Text-Code and an aditional property `characters`\n :rtype: dict\n \"\"\"\ntext = text_collapse(text)\ncharacters = len(text)\ndigest = soft_hash_text_v0(text)\ntext_code = ic.encode_component(\nmtype=ic.MT.CONTENT,\nstype=ic.ST_CC.TEXT,\nversion=ic.VS.V0,\nbit_length=bits,\ndigest=digest,\n)\niscc = \"ISCC:\" + text_code\nreturn dict(iscc=iscc, characters=characters)\n
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.text_collapse","title":"text_collapse(text)
","text":"Normalize and simplify text for similarity hashing.
- Decompose with NFD normalization.
- Remove all whitespace characters and convert text to lower case
- Filter control characters, marks (diacritics), and punctuation
- Recombine with NFKC normalization.
Note
See: Unicode normalization.
Parameters:
Name Type Description Default text
str
Plain text to be collapsed.
required Returns:
Type Description str
Collapsed plain text.
Source code in iscc_core\\code_content_text.py
def text_collapse(text):\n# type: (str) -> str\n\"\"\"\n Normalize and simplify text for similarity hashing.\n - Decompose with NFD normalization.\n - Remove all whitespace characters and convert text to lower case\n - Filter control characters, marks (diacritics), and punctuation\n - Recombine with NFKC normalization.\n !!! note\n See: [Unicode normalization](https://unicode.org/reports/tr15/).\n :param str text: Plain text to be collapsed.\n :return: Collapsed plain text.\n :rtype: str\n \"\"\"\n# Decompose with NFD\ntext = unicodedata.normalize(\"NFD\", text)\n# Remove all whitespace and convert text to lower case\ntext = \"\".join(text.split()).lower()\n# Filter control characters, marks (diacritics), and punctuation\ntext = \"\".join(\nch for ch in text if unicodedata.category(ch)[0] not in ic.core_opts.text_unicode_filter\n)\n# Recombine\ntext = unicodedata.normalize(\"NFKC\", text)\nreturn text\n
"},{"location":"units/content/code_content_text/#iscc_core.code_content_text.soft_hash_text_v0","title":"soft_hash_text_v0(text)
","text":"Creates a 256-bit similarity preserving hash for text input with algorithm v0.
- Slide over text with a
text_ngram_size
wide window and create xxh32
hashes - Create a
minhash_256
from the hashes generated in the previous step.
Note
Before passing text to this function it must be:
- stripped of markup
- normalized
- stripped of whitespace
- lowercased
Parameters:
Name Type Description Default text
str
Plain text to be hashed.
required Returns:
Type Description bytes
256-bit similarity preserving byte hash.
Source code in iscc_core\\code_content_text.py
def soft_hash_text_v0(text):\n# type: (str) -> bytes\n\"\"\"\n Creates a 256-bit similarity preserving hash for text input with algorithm v0.\n - Slide over text with a\n [`text_ngram_size`][iscc_core.options.CoreOptions.text_ngram_size] wide window\n and create [`xxh32`](https://cyan4973.github.io/xxHash/) hashes\n - Create a [`minhash_256`][iscc_core.minhash.alg_minhash_256] from the hashes generated\n in the previous step.\n !!! note\n Before passing text to this function it must be:\n - stripped of markup\n - normalized\n - stripped of whitespace\n - lowercased\n :param str text: Plain text to be hashed.\n :return: 256-bit similarity preserving byte hash.\n :rtype: bytes\n \"\"\"\nngrams = ic.sliding_window(text, ic.core_opts.text_ngram_size)\nfeatures = [xxhash.xxh32_intdigest(s.encode(\"utf-8\")) for s in ngrams]\nhash_digest = ic.alg_minhash_256(features)\nreturn hash_digest\n
"},{"location":"units/content/code_content_video/","title":"ISCC - Video-Code","text":"A similarity preserving hash for video content
The Content-Code Video is generated from MPEG-7 video frame signatures. The iscc-sdk uses ffmpeg to extract frame signatures with the following command line parameters:
$ ffmpeg -i video.mpg -vf fps=fps=5,signature=format=xml:filename=sig.xml -f null -
The relevant frame signatures can be parsed from the following elements in sig.xml:
<FrameSignature>0 0 0 1 0 0 1 0 1 1 0 0 1 1 ...</FrameSignature>
Tip
It is also possible to extract the signatures in a more compact binary format. But the format requires a custom binary parser to decode the frame signaturs.
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.gen_video_code","title":"gen_video_code(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Create an ISCC Video-Code with the latest standard algorithm.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Instance-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.gen_video_code_v0","title":"gen_video_code_v0(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Create an ISCC Video-Code with algorithm v0.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
Sequence of MP7 frame signatures
required bits
int
Bit-length resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
Returns:
Type Description dict
ISCC object with Video-Code
"},{"location":"units/content/code_content_video/#iscc_core.code_content_video.soft_hash_video_v0","title":"soft_hash_video_v0(frame_sigs, bits = ic.core_opts.video_bits)
","text":"Compute video hash v0 from MP7 frame signatures.
Parameters:
Name Type Description Default frame_sigs
ic.FrameSig
2D matrix of MP7 frame signatures
required bits
int
Bit-length of resulting Video-Code (multiple of 64)
ic.core_opts.video_bits
"},{"location":"utilities/utils/","title":"ISCC - Utilities","text":""},{"location":"utilities/utils/#iscc_core.utils.json_canonical","title":"json_canonical(obj)
","text":"Canonical, deterministic serialization of ISCC metadata.
We serialize ISCC metadata in a deterministic/reproducible manner by using JCS (RFC 8785) canonicalization.
Source code in iscc_core\\utils.py
def json_canonical(obj):\n# type: (Any) -> bytes\n\"\"\"\n Canonical, deterministic serialization of ISCC metadata.\n We serialize ISCC metadata in a deterministic/reproducible manner by using\n [JCS (RFC 8785)](https://datatracker.ietf.org/doc/html/rfc8785) canonicalization.\n \"\"\"\nser = jcs.canonicalize(obj)\ndes = json.loads(ser)\nif des != obj:\nraise ValueError(f\"Not canonicalizable {obj} round-trips to {des}\")\nreturn ser\n
"},{"location":"utilities/utils/#iscc_core.utils.sliding_window","title":"sliding_window(seq, width)
","text":"Generate a sequence of equal \"width\" slices each advancing by one elemnt.
All types that have a length and can be sliced are supported (list, tuple, str ...). The result type matches the type of the input sequence. Fragment slices smaller than the width at the end of the sequence are not produced. If \"witdh\" is smaller than the input sequence than one element will be returned that is shorter than the requested width.
Parameters:
Name Type Description Default seq
Sequence
Sequence of values to slide over
required width
int
Width of sliding window in number of items
required Returns:
Type Description Generator
A generator of window sized items
Source code in iscc_core\\utils.py
def sliding_window(seq, width):\n# type: (Sequence, int) -> Generator\n\"\"\"\n Generate a sequence of equal \"width\" slices each advancing by one elemnt.\n All types that have a length and can be sliced are supported (list, tuple, str ...).\n The result type matches the type of the input sequence.\n Fragment slices smaller than the width at the end of the sequence are not produced.\n If \"witdh\" is smaller than the input sequence than one element will be returned that\n is shorter than the requested width.\n :param Sequence seq: Sequence of values to slide over\n :param int width: Width of sliding window in number of items\n :returns: A generator of window sized items\n :rtype: Generator\n \"\"\"\nif width < 2:\nraise AssertionError(\"Sliding window width must be 2 or bigger.\")\nidx = range(max(len(seq) - width + 1, 1))\nreturn (seq[i : i + width] for i in idx)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_compare","title":"iscc_compare(a, b)
","text":"Calculate separate hamming distances of compatible components of two ISCCs
Returns:
Type Description dict
A dict with keys meta_dist, semantic_dist, content_dist, data_dist, instance_match
Source code in iscc_core\\utils.py
def iscc_compare(a, b):\n# type: (str, str) -> dict\n\"\"\"\n Calculate separate hamming distances of compatible components of two ISCCs\n :return: A dict with keys meta_dist, semantic_dist, content_dist, data_dist, instance_match\n :rtype: dict\n \"\"\"\nac = [ic.Code(unit) for unit in ic.iscc_decompose(a)]\nbc = [ic.Code(unit) for unit in ic.iscc_decompose(b)]\nresult = {}\nfor ca in ac:\nfor cb in bc:\ncat = (ca.maintype, ca.subtype, ca.version)\ncbt = (cb.maintype, cb.subtype, ca.version)\nif cat == cbt:\nif ca.maintype != ic.MT.INSTANCE:\nresult[ca.maintype.name.lower() + \"_dist\"] = iscc_distance_bytes(\nca.hash_bytes, cb.hash_bytes\n)\nelse:\nresult[\"instance_match\"] = ca.hash_bytes == cb.hash_bytes\nreturn result\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_similarity","title":"iscc_similarity(a, b)
","text":"Calculate similarity of ISCC codes as a percentage value (0-100).
MainType, SubType, Version and Length of the codes must be the same.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description int
Similarity of ISCC a and b in percent (based on hamming distance)
Source code in iscc_core\\utils.py
def iscc_similarity(a, b):\n# type: (str, str) -> int\n\"\"\"\n Calculate similarity of ISCC codes as a percentage value (0-100).\n MainType, SubType, Version and Length of the codes must be the same.\n :param a: ISCC a\n :param b: ISCC b\n :return: Similarity of ISCC a and b in percent (based on hamming distance)\n :rtype: int\n \"\"\"\na, b = iscc_pair_unpack(a, b)\nhdist = iscc_distance_bytes(a, b)\nnbits = len(a) * 8\nsim = int(((nbits - hdist) / nbits) * 100)\nreturn sim\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_distance","title":"iscc_distance(a, b)
","text":"Calculate hamming distance of ISCC codes.
MainType, SubType, Version and Length of the codes must be the same.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description int
Hamming distanced in number of bits.
Source code in iscc_core\\utils.py
def iscc_distance(a, b):\n# type: (str, str) -> int\n\"\"\"\n Calculate hamming distance of ISCC codes.\n MainType, SubType, Version and Length of the codes must be the same.\n :param a: ISCC a\n :param b: ISCC b\n :return: Hamming distanced in number of bits.\n :rtype: int\n \"\"\"\na, b = iscc_pair_unpack(a, b)\nreturn iscc_distance_bytes(a, b)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_distance_bytes","title":"iscc_distance_bytes(a, b)
","text":"Calculate hamming distance for binary hash digests of equal length.
Parameters:
Name Type Description Default a
bytes
binary hash digest
required b
bytes
binary hash digest
required Returns:
Type Description int
Hamming distance in number of bits.
Source code in iscc_core\\utils.py
def iscc_distance_bytes(a, b):\n# type: (bytes, bytes) -> int\n\"\"\"\n Calculate hamming distance for binary hash digests of equal length.\n :param bytes a: binary hash digest\n :param bytes b: binary hash digest\n :return: Hamming distance in number of bits.\n :rtype: int\n \"\"\"\nif len(a) != len(b):\nraise AssertionError(f\"Hash diggest of unequal length: {len(a)} vs {len(b)}\")\nba, bb = bitarray(), bitarray()\nba.frombytes(a)\nbb.frombytes(b)\nreturn count_xor(ba, bb)\n
"},{"location":"utilities/utils/#iscc_core.utils.iscc_pair_unpack","title":"iscc_pair_unpack(a, b)
","text":"Unpack two ISCC codes and return their body hash digests if their headers match.
Headers match if their MainType, SubType, and Version are identical.
Parameters:
Name Type Description Default a
ISCC a
required b
ISCC b
required Returns:
Type Description Tuple[bytes, bytes]
Tuple with hash digests of a and b
Raises:
Type Description ValueError
If ISCC headers don\u00b4t match
Source code in iscc_core\\utils.py
def iscc_pair_unpack(a, b):\n# type: (str, str) -> Tuple[bytes, bytes]\n\"\"\"\n Unpack two ISCC codes and return their body hash digests if their headers match.\n Headers match if their MainType, SubType, and Version are identical.\n :param a: ISCC a\n :param b: ISCC b\n :return: Tuple with hash digests of a and b\n :rtype: Tuple[bytes, bytes]\n :raise ValueError: If ISCC headers don\u00b4t match\n \"\"\"\na, b = ic.iscc_clean(ic.iscc_normalize(a)), ic.iscc_clean(ic.iscc_normalize(b))\na, b = ic.decode_base32(a), ic.decode_base32(b)\na, b = ic.decode_header(a), ic.decode_header(b)\nif not a[:-1] == b[:-1]:\nraise ValueError(f\"ISCC headers don\u00b4t match: {a}, {b}\")\nreturn a[-1], b[-1]\n
"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 0925de6..73d6781 100755
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,137 +2,137 @@
https://core.iscc.codes/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/changelog/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/conformance/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/constants/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/iscc_code/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/iscc_id/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/iso-reference/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/algorithms/cdc/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/algorithms/dct/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/algorithms/minhash/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/algorithms/simhash/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/algorithms/wtahash/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/codec/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/includes/abbr/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/options/options/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/code_data/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/code_flake/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/code_instance/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/code_meta/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/code_content_audio/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/code_content_image/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/code_content_mixed/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/code_content_text/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/units/content/code_content_video/
- 2023-12-07
+ 2023-12-15
daily
https://core.iscc.codes/utilities/utils/
- 2023-12-07
+ 2023-12-15
daily
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 949ba4933aab9b30ae30e503cf58d0cc43914e6e..8d07a4a1a84f7448f64bed88d1ea829ee621f4f0 100755
GIT binary patch
delta 361
zcmV-v0ha#y0{a37ABzYGR!Mx32OWP&s!DAIWa|^ObL7|uoa{K0^BTH3YR;yHp1Q
zKAY3=$u`2(;c88Gfe_KVI^UJsJOih^M%UH(``c^rlpmT+H3SoZvXty%o%clN7a_)R
zEF%(Fd5)zIortPWohX(;u`QqTW|O6BDsYSDt1@0SQ>TTM1TCw(NMXY@mT`yzL+8EKYH;yEE
z8mBwMPXwJZ!#szlQ{cz*XG&S8q~DeuaBxU$SyOz}k|b+jYvc#87UF!UQoNBCIIpuK
zaKxQA&*RcL%0Z)Fl^Ct;U{zv^SJ0~x6I|D|jW4Ky<2EKVNVr7*@CTin2-JK7t8VgG
HfDr%yU^cE1
delta 360
zcmV-u0hj*!0{Q|6ABzYG3e0hl2OWPT9r`0ewmv~SM~;2Kss2grBcyL%Lr|NwOLZ{d
zvpF4~Y$IGBuGVB12ob&8=i9Q%GjPglblpCGe|s&S@