Skip to content

Acquisition Schema

Machine-readable schema for per-case real-data acquisition metadata.

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://cityjson-corpus.org/schemas/acquisition.schema.json",
  "title": "Shared Corpus Acquisition Contract",
  "type": "object",
  "additionalProperties": false,
  "required": [
    "version",
    "id",
    "status",
    "dataset",
    "owner",
    "selector",
    "reference",
    "preprocessing",
    "outputs",
    "license",
    "notes"
  ],
  "properties": {
    "version": {
      "type": "integer",
      "const": 2
    },
    "id": {
      "type": "string",
      "minLength": 1
    },
    "status": {
      "type": "string",
      "enum": [
        "planned",
        "published"
      ],
      "description": "Acquisition lifecycle state. See schemas/README.md for the controlled vocabulary."
    },
    "dataset": {
      "type": "string",
      "minLength": 1
    },
    "owner": {
      "type": "string",
      "minLength": 1
    },
    "upstream_url": {
      "type": [
        "string",
        "null"
      ]
    },
    "upstream_version": {
      "type": [
        "string",
        "null"
      ]
    },
    "selector": {
      "type": "string",
      "minLength": 1
    },
    "reference": {
      "type": "string",
      "minLength": 1
    },
    "preprocessing": {
      "type": "array",
      "items": {
        "type": "string",
        "minLength": 1
      }
    },
    "outputs": {
      "type": "array",
      "minItems": 1,
      "items": {
        "$ref": "#/$defs/output"
      }
    },
    "license": {
      "type": "string",
      "minLength": 1
    },
    "notes": {
      "type": "string",
      "minLength": 1
    }
  },
  "$defs": {
    "output": {
      "type": "object",
      "additionalProperties": false,
      "required": [
        "producer",
        "derivation",
        "validation_role",
        "representation",
        "published"
      ],
      "properties": {
        "representation": {
          "type": "string",
          "minLength": 1
        },
        "producer": {
          "type": "string",
          "minLength": 1
        },
        "derivation": {
          "type": "string",
          "enum": [
            "acquired",
            "merged",
            "exported",
            "materialized"
          ]
        },
        "derived_from": {
          "type": "array",
          "items": {
            "type": "string",
            "minLength": 1
          }
        },
        "validation_role": {
          "type": "string",
          "enum": [
            "canonical",
            "benchmark-only"
          ]
        },
        "path": {
          "type": [
            "string",
            "null"
          ]
        },
        "checksum": {
          "type": [
            "string",
            "null"
          ]
        },
        "byte_size": {
          "type": [
            "integer",
            "null"
          ],
          "minimum": 0
        },
        "published": {
          "type": "boolean"
        }
      },
      "allOf": [
        {
          "if": {
            "properties": {
              "derivation": {
                "const": "acquired"
              }
            }
          },
          "then": {
            "not": {
              "required": ["derived_from"]
            }
          }
        },
        {
          "if": {
            "properties": {
              "derivation": {
                "enum": [
                  "merged",
                  "exported",
                  "materialized"
                ]
              }
            }
          },
          "then": {
            "required": ["derived_from"]
          }
        }
      ]
    }
  }
}