Module src.utils.jsonid2pronom

jsonid2pronom provides a helper script to enable export of generic JSONID compatible markers to a PRONOM compatible signature file.

Functions

async def load_patterns(path: str) ‑> list
Expand source code
async def load_patterns(path: str) -> list:
    """Load patterns from a file for conversion to a signature file."""
    patterns = []
    with open(path, "r", encoding="utf-8") as patterns_file:
        patterns = json.loads(patterns_file.read())
    return patterns

Load patterns from a file for conversion to a signature file.

def main() ‑> None
Expand source code
def main() -> None:
    """Primary entry point for this script."""
    parser = argparse.ArgumentParser(
        prog="jsonid2pronom",
        description="convert JSONID compatible markers to PRONOM",
        epilog="for more information visit https://github.com/ffdev-info/jsonid",
    )
    parser.add_argument(
        "--debug",
        help="use debug loggng",
        required=False,
        action="store_true",
    )
    parser.add_argument(
        "--path",
        "-p",
        help="file path to process",
        required=False,
    )
    args = parser.parse_args()
    logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO)
    logger.debug("debug logging is configured")
    if not args.path:
        parser.print_help(sys.stderr)
        sys.exit()
    asyncio.run(
        output_signature(
            path=args.path,
        )
    )

Primary entry point for this script.

async def output_signature(path: str)
Expand source code
async def output_signature(path: str):
    """Output JSONID compatible signatures to PRONOM."""

    formats = []

    encodings = ("UTF-8", "UTF-16", "UTF-16BE", "UTF-32LE")
    priorities = []

    increment_id = 0

    markers = await load_patterns(path)

    if not markers:
        logger.error("no patterns provided via path arg")
        sys.exit(1)

    for encoding in encodings:
        increment_id += 1
        json_puid = "jsonid2pronom/1"
        name_ = f"JSONID2PRONOM Conversion ({encoding})"
        try:
            mime = "application/json"
        except IndexError:
            mime = ""
        try:
            sequences = pronom.process_markers(
                copy.deepcopy(markers),
                increment_id,
                encoding=encoding,
            )
        except pronom.UnprocessableEntity as err:
            logger.error(
                "%s %s: cannot handle: %s",
                json_puid,
                name_,
                err,
            )
            for err_marker in markers:
                logger.debug("--- START ---")
                logger.debug("marker: %s", err_marker)
                logger.debug("---  END  ---")
            continue
        fmt = pronom.Format(
            id=increment_id,
            name=name_,
            version="",
            puid=json_puid,
            mime=mime,
            classification="structured text",
            external_signatures=[
                pronom.ExternalSignature(
                    id=increment_id,
                    signature="json",
                    type=pronom.EXT,
                )
            ],
            internal_signatures=sequences,
            priorities=list(set(priorities)),
        )
        priorities.append(f"{increment_id}")
        formats.append(fmt)

    pronom.process_formats_to_stdout(formats)

Output JSONID compatible signatures to PRONOM.