diff --git a/.github/workflows/docker-build-push.yaml b/.github/workflows/docker-build-push.yaml new file mode 100644 index 0000000..78dd46c --- /dev/null +++ b/.github/workflows/docker-build-push.yaml @@ -0,0 +1,74 @@ +name: Build and Push Docker Image + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - 'src/**' + - 'Dockerfile' + - 'pyproject.toml' + - '.github/workflows/docker-build-push.yaml' + +env: + ACR_NAME: crfhiskybert + IMAGE: crfhiskybert.azurecr.io/fida/ki/statistikk-mcp + +jobs: + build-and-push: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Get short SHA + id: sha + run: echo "short=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT" + + - name: Login to Azure using Federated Identity + uses: azure/login@v2 + with: + client-id: ${{ vars.AZURE_CLIENT_ID }} + tenant-id: ${{ vars.AZURE_TENANT_ID }} + allow-no-subscriptions: true + + - name: Login to ACR + run: az acr login --name ${{ env.ACR_NAME }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE }} + tags: | + type=sha,prefix= + type=raw,value=latest,enable={{is_default_branch}} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + target: prod + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Trigger GitOps tag update + run: | + curl -sS -f -L \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITOPS_PAT }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/${{ vars.GITOPS_REPO }}/dispatches" \ + -d '{"event_type":"update_tag","client_payload":{"env":"test","updates":[{"repository":"fida/ki/statistikk-mcp","tag":"${{ steps.sha.outputs.short }}"}]}}' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..125b3d3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv +.mcp.json diff --git a/.mcp.json.local b/.mcp.json.local new file mode 100644 index 0000000..525ccda --- /dev/null +++ b/.mcp.json.local @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "fhi-statistikk": { + "type": "sse", + "url": "http://localhost:8000/sse" + } + } +} diff --git a/.mcp.json.public b/.mcp.json.public new file mode 100644 index 0000000..3297467 --- /dev/null +++ b/.mcp.json.public @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "fhi-statistikk": { + "type": "sse", + "url": "https://statistikk-mcp.sky.fhi.no/sse" + } + } +} diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e57b03b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.12-slim AS base +WORKDIR /app + +FROM base AS prod +COPY pyproject.toml . +COPY src/ src/ +RUN pip install --no-cache-dir . + +EXPOSE 8000 +CMD ["fhi-statistikk-mcp", "--transport", "sse", "--host", "0.0.0.0", "--port", "8000"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..34d80de --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +ACR := crfhiskybert.azurecr.io +IMAGE := $(ACR)/fida/ki/fhi-statistikk-mcp +TAG := $(shell git rev-parse --short HEAD) + +.PHONY: docker-build docker-push docker acr-login + +acr-login: + az acr login --name crfhiskybert + +docker-build: + docker build --target prod -t $(IMAGE):$(TAG) -t $(IMAGE):latest . + +docker-push: + docker push $(IMAGE):$(TAG) + docker push $(IMAGE):latest + +docker: acr-login docker-build docker-push + +run: + docker run --rm -p 18000:8000 $(IMAGE):latest diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..15af8ee --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,29 @@ +[project] +name = "fhi-statistikk-mcp" +version = "0.1.0" +description = "MCP server for FHI Statistikk Open API" +requires-python = ">=3.12" +dependencies = [ + "mcp>=1.0.0", + "uvicorn>=0.30", + "httpx>=0.27", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-asyncio>=0.24", +] + +[project.scripts] +fhi-statistikk-mcp = "fhi_statistikk_mcp.server:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/fhi_statistikk_mcp"] + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/src/fhi_statistikk_mcp/__init__.py b/src/fhi_statistikk_mcp/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/fhi_statistikk_mcp/__init__.py @@ -0,0 +1 @@ + diff --git a/src/fhi_statistikk_mcp/__pycache__/__init__.cpython-312.pyc b/src/fhi_statistikk_mcp/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..f64e08b Binary files /dev/null and b/src/fhi_statistikk_mcp/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/fhi_statistikk_mcp/__pycache__/api_client.cpython-312.pyc b/src/fhi_statistikk_mcp/__pycache__/api_client.cpython-312.pyc new file mode 100644 index 0000000..a4ef766 Binary files /dev/null and b/src/fhi_statistikk_mcp/__pycache__/api_client.cpython-312.pyc differ diff --git a/src/fhi_statistikk_mcp/__pycache__/cache.cpython-312.pyc b/src/fhi_statistikk_mcp/__pycache__/cache.cpython-312.pyc new file mode 100644 index 0000000..e69343b Binary files /dev/null and b/src/fhi_statistikk_mcp/__pycache__/cache.cpython-312.pyc differ diff --git a/src/fhi_statistikk_mcp/__pycache__/server.cpython-312.pyc b/src/fhi_statistikk_mcp/__pycache__/server.cpython-312.pyc new file mode 100644 index 0000000..495705a Binary files /dev/null and b/src/fhi_statistikk_mcp/__pycache__/server.cpython-312.pyc differ diff --git a/src/fhi_statistikk_mcp/__pycache__/transformers.cpython-312.pyc b/src/fhi_statistikk_mcp/__pycache__/transformers.cpython-312.pyc new file mode 100644 index 0000000..2440619 Binary files /dev/null and b/src/fhi_statistikk_mcp/__pycache__/transformers.cpython-312.pyc differ diff --git a/src/fhi_statistikk_mcp/api_client.py b/src/fhi_statistikk_mcp/api_client.py new file mode 100644 index 0000000..5dc40f7 --- /dev/null +++ b/src/fhi_statistikk_mcp/api_client.py @@ -0,0 +1,253 @@ +"""Async HTTP client for FHI Statistikk Open API.""" + +import asyncio +import logging +import time + +import httpx + +from .cache import TTLCache + +logger = logging.getLogger(__name__) + +BASE_URL = "https://statistikk-data.fhi.no/api/open/v1" + +# Cache TTLs in seconds +TTL_SOURCES = 24 * 3600 +TTL_TABLES = 3600 +TTL_DIMENSIONS = 6 * 3600 +TTL_METADATA = 6 * 3600 +TTL_FLAGS = 6 * 3600 +TTL_QUERY = 6 * 3600 + +MIN_REQUEST_INTERVAL = 0.1 # 100ms between requests + + +class ApiError(Exception): + def __init__(self, status_code: int, detail: str) -> None: + self.status_code = status_code + self.detail = detail + super().__init__(f"API {status_code}: {detail}") + + +class ApiClient: + """Async client wrapping the FHI Statistikk REST API with caching and rate limiting.""" + + def __init__(self) -> None: + self._client = httpx.AsyncClient(base_url=BASE_URL, timeout=30.0) + self._cache = TTLCache() + self._semaphore = asyncio.Semaphore(5) + self._request_lock = asyncio.Lock() + self._last_request_time = 0.0 + + async def close(self) -> None: + await self._client.aclose() + + async def _request( + self, + method: str, + path: str, + *, + accept: str = "application/json", + json_body: dict | None = None, + ) -> httpx.Response: + """Rate-limited HTTP request with retry on 429/503/timeout.""" + async with self._semaphore: + # Compute wait atomically, release lock before sleeping + async with self._request_lock: + now = time.monotonic() + wait = MIN_REQUEST_INTERVAL - (now - self._last_request_time) + self._last_request_time = max( + now, self._last_request_time + MIN_REQUEST_INTERVAL + ) + if wait > 0: + await asyncio.sleep(wait) + + last_exc: Exception | None = None + resp: httpx.Response | None = None + for attempt in range(3): + try: + resp = await self._client.request( + method, path, headers={"Accept": accept}, json=json_body, + ) + except httpx.TimeoutException as exc: + last_exc = exc + delay = (attempt + 1) * 2 + logger.warning( + "Timeout (attempt %d), retrying in %ds", attempt + 1, delay + ) + await asyncio.sleep(delay) + continue + except httpx.RequestError as exc: + raise ApiError(0, f"Network error: {exc}") from exc + + if resp.status_code in (429, 503) and attempt < 2: + delay = (attempt + 1) * 2 + logger.warning( + "Got %d, retrying in %ds", resp.status_code, delay + ) + await asyncio.sleep(delay) + continue + + if resp.status_code >= 400: + raise ApiError(resp.status_code, _extract_error(resp)) + return resp + + # All retries exhausted + if last_exc is not None: + raise ApiError( + 0, "API request timed out. Try reducing query scope." + ) from last_exc + if resp is not None: + raise ApiError(resp.status_code, _extract_error(resp)) + raise ApiError(0, "API request failed after retries.") + + async def _get_json(self, path: str) -> dict | list: + resp = await self._request("GET", path) + return resp.json() + + # --- Cached endpoints --- + + async def get_sources(self) -> list[dict]: + cached = self._cache.get("sources") + if cached is not None: + return cached + data = await self._get_json("/Common/source") + self._cache.set("sources", data, TTL_SOURCES) + return data + + async def get_tables( + self, source_id: str, modified_after: str | None = None, + ) -> list[dict]: + cache_key = f"tables:{source_id}" + cached = self._cache.get(cache_key) + if cached is not None and modified_after is None: + return cached + path = f"/{source_id}/Table" + if modified_after: + path += f"?modifiedAfter={modified_after}" + data = await self._get_json(path) + if modified_after is None: + self._cache.set(cache_key, data, TTL_TABLES) + return data + + async def get_table_info(self, source_id: str, table_id: int) -> dict: + cache_key = f"table_info:{source_id}:{table_id}" + cached = self._cache.get(cache_key) + if cached is not None: + return cached + data = await self._get_json(f"/{source_id}/Table/{table_id}") + self._cache.set(cache_key, data, TTL_METADATA) + return data + + async def get_dimensions(self, source_id: str, table_id: int) -> list[dict]: + cache_key = f"dims:{source_id}:{table_id}" + cached = self._cache.get(cache_key) + if cached is not None: + return cached + data = await self._get_json(f"/{source_id}/Table/{table_id}/dimension") + if isinstance(data, dict): + data = data.get("dimensions", []) + self._cache.set(cache_key, data, TTL_DIMENSIONS) + return data + + async def get_metadata(self, source_id: str, table_id: int) -> dict: + cache_key = f"meta:{source_id}:{table_id}" + cached = self._cache.get(cache_key) + if cached is not None: + return cached + data = await self._get_json(f"/{source_id}/Table/{table_id}/metadata") + self._cache.set(cache_key, data, TTL_METADATA) + return data + + async def get_flags(self, source_id: str, table_id: int) -> list[dict]: + cache_key = f"flags:{source_id}:{table_id}" + cached = self._cache.get(cache_key) + if cached is not None: + return cached + data = await self._get_json(f"/{source_id}/Table/{table_id}/flag") + self._cache.set(cache_key, data, TTL_FLAGS) + return data + + async def get_query_template(self, source_id: str, table_id: int) -> dict: + cache_key = f"query:{source_id}:{table_id}" + cached = self._cache.get(cache_key) + if cached is not None: + return cached + data = await self._get_json(f"/{source_id}/Table/{table_id}/query") + self._cache.set(cache_key, data, TTL_QUERY) + return data + + async def post_data( + self, + source_id: str, + table_id: int, + body: dict, + max_row_count: int = 50000, + ) -> str: + """Post a data query, return raw CSV text.""" + request_body = {**body} + request_body["response"] = { + "format": "csv2", + "maxRowCount": max_row_count, + } + resp = await self._request( + "POST", + f"/{source_id}/Table/{table_id}/data", + accept="text/csv", + json_body=request_body, + ) + return resp.text + + +# --- Module-level lifecycle management --- + +_client: ApiClient | None = None + + +def init_client() -> ApiClient: + """Create the shared client. Call from server lifespan.""" + global _client + _client = ApiClient() + return _client + + +async def close_client() -> None: + """Close the shared client. Call from server lifespan shutdown.""" + global _client + if _client is not None: + await _client.close() + _client = None + + +def get_client() -> ApiClient: + """Get (or lazily create) the shared client.""" + global _client + if _client is None: + _client = ApiClient() + return _client + + +def _extract_error(resp: httpx.Response) -> str: + """Extract human-readable error from API response (RFC 7807 ProblemDetails).""" + try: + body = resp.json() + if isinstance(body, dict): + parts = [] + title = body.get("title", "") + if title: + parts.append(title) + detail = body.get("detail", "") + if detail: + parts.append(detail) + errors = body.get("errors", {}) + if isinstance(errors, dict): + for msgs in errors.values(): + if isinstance(msgs, list): + parts.extend(str(m) for m in msgs) + if parts: + return " | ".join(parts) + return str(body) + except Exception: + pass + return resp.text[:500] if resp.text else f"HTTP {resp.status_code}" diff --git a/src/fhi_statistikk_mcp/cache.py b/src/fhi_statistikk_mcp/cache.py new file mode 100644 index 0000000..5bbab5c --- /dev/null +++ b/src/fhi_statistikk_mcp/cache.py @@ -0,0 +1,27 @@ +"""Simple in-memory TTL cache.""" + +import time +from typing import Any + + +class TTLCache: + """Dict-based cache with per-key TTL expiry.""" + + def __init__(self) -> None: + self._store: dict[str, tuple[float, Any]] = {} + + def get(self, key: str) -> Any | None: + entry = self._store.get(key) + if entry is None: + return None + expires_at, value = entry + if time.monotonic() > expires_at: + del self._store[key] + return None + return value + + def set(self, key: str, value: Any, ttl_seconds: float) -> None: + self._store[key] = (time.monotonic() + ttl_seconds, value) + + def clear(self) -> None: + self._store.clear() diff --git a/src/fhi_statistikk_mcp/server.py b/src/fhi_statistikk_mcp/server.py new file mode 100644 index 0000000..145b6ba --- /dev/null +++ b/src/fhi_statistikk_mcp/server.py @@ -0,0 +1,311 @@ +"""MCP server exposing FHI Statistikk Open API as agent-friendly tools.""" + +import argparse +import asyncio +import logging +import sys +from contextlib import asynccontextmanager +from typing import Optional + +for _name in ("uvicorn", "uvicorn.error", "uvicorn.access", "mcp", "fastmcp"): + _log = logging.getLogger(_name) + _log.handlers = [] + _handler = logging.StreamHandler(sys.stderr) + _handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s")) + _log.addHandler(_handler) + _log.propagate = False + +logger = logging.getLogger("fhi_statistikk_mcp") +logger.setLevel(logging.INFO) +_h = logging.StreamHandler(sys.stderr) +_h.setFormatter(logging.Formatter("%(levelname)s: %(message)s")) +logger.addHandler(_h) + +from mcp.server.fastmcp import FastMCP + +from .api_client import ApiError, close_client, get_client, init_client +from .transformers import ( + complete_query_dimensions, + extract_metadata_fields, + matches_search, + navigate_hierarchy, + parse_csv_to_rows, + strip_html, + summarize_dimensions, +) + + +# --------------------------------------------------------------------------- +# Tool implementations +# --------------------------------------------------------------------------- + +async def list_sources() -> list[dict]: + """List all available FHI data sources. + + Returns an array of sources with id, title, description, and publisher. + This is the entry point for discovering what data is available. + + Sources include public health statistics (Folkehelsestatistikk), + vaccination registry (SYSVAK), cause of death registry, and more. + All data is open (CC BY 4.0), no authentication required. + """ + api = get_client() + raw = await api.get_sources() + return [ + { + "id": s.get("id", ""), + "title": s.get("title", ""), + "description": strip_html(s.get("description", "")), + "published_by": s.get("publishedBy", ""), + } + for s in raw + ] + + +async def list_tables( + source_id: str, + search: Optional[str] = None, + modified_after: Optional[str] = None, +) -> list[dict]: + """List tables within a data source, with optional keyword search. + + Args: + source_id: Source identifier, e.g. "nokkel", "msis", "daar". + Use list_sources to find available source IDs. + search: Case-insensitive keyword filter on table title. + Multiple words must all match. Example: "befolkning vekst" + modified_after: ISO-8601 datetime. Only return tables modified after + this date. Example: "2025-01-01" + + Returns tables sorted by modification date (newest first). + """ + api = get_client() + raw = await api.get_tables(source_id, modified_after) + tables = [ + { + "table_id": t.get("tableId"), + "title": t.get("title", ""), + "published_at": t.get("publishedAt", ""), + "modified_at": t.get("modifiedAt", ""), + } + for t in raw + ] + + if search: + tables = [t for t in tables if matches_search(t["title"], search)] + + tables.sort(key=lambda t: t["modified_at"] or "", reverse=True) + return tables + + +async def describe_table(source_id: str, table_id: int) -> dict: + """Get complete table structure: dimensions, metadata, and flags. + + This is the primary tool for understanding a table before querying data. + Returns everything needed to construct a query_data call. + + Args: + source_id: Source identifier, e.g. "nokkel" + table_id: Numeric table ID from list_tables + + The response includes: + - title, dates, description, keywords, update frequency + - dimensions with their codes, labels, and available values + - flags (symbols for missing/suppressed data) + + Large dimensions (GEO with 400+ entries) show only top-level values. + Use get_dimension_values to drill into sub-levels. + + Fixed dimensions (single value, like KJONN="kjønn samlet") are marked + with is_fixed=true -- query_data auto-includes these. + """ + api = get_client() + + info, dims, meta, flags = await asyncio.gather( + api.get_table_info(source_id, table_id), + api.get_dimensions(source_id, table_id), + api.get_metadata(source_id, table_id), + api.get_flags(source_id, table_id), + ) + + meta_fields = extract_metadata_fields(meta) + dim_summaries = summarize_dimensions(dims if isinstance(dims, list) else []) + + result = { + "title": info.get("title", ""), + "published_at": info.get("publishedAt", ""), + "modified_at": info.get("modifiedAt", ""), + } + result.update(meta_fields) + result["dimensions"] = dim_summaries + result["flags"] = [ + {"symbol": f.get("symbol", ""), "description": f.get("description", "")} + for f in (flags if isinstance(flags, list) else []) + ] + return result + + +async def get_dimension_values( + source_id: str, + table_id: int, + dimension_code: str, + parent_value: Optional[str] = None, + search: Optional[str] = None, +) -> list[dict]: + """Drill into dimension values, especially for large hierarchical dimensions like GEO. + + Args: + source_id: Source identifier + table_id: Table ID + dimension_code: Dimension code, e.g. "GEO", "AAR", "ALDER" + parent_value: Return only children of this category. + Example: "18" for Nordland county municipalities. + If omitted, returns top-level categories. + search: Case-insensitive search on category labels. + Accent-insensitive: "tromso" matches "Tromsø". + Example: "bodø", "oslo", "bergen" + + Returns array of {value, label, child_count}. + """ + api = get_client() + dims = await api.get_dimensions(source_id, table_id) + + target = None + for d in (dims if isinstance(dims, list) else []): + if d.get("code", "").upper() == dimension_code.upper(): + target = d + break + + if target is None: + available = [d.get("code", "") for d in (dims if isinstance(dims, list) else [])] + raise ValueError( + f"Dimension '{dimension_code}' not found. " + f"Available: {', '.join(available)}" + ) + + raw_categories = target.get("categories", []) + return navigate_hierarchy(raw_categories, parent_value, search) + + +async def query_data( + source_id: str, + table_id: int, + dimensions: list[dict], + max_rows: int = 1000, +) -> dict: + """Fetch statistical data from an FHI table. + + Before calling this, use describe_table to understand the table's + dimensions and available values. + + Args: + source_id: Source identifier + table_id: Table ID + dimensions: Array of dimension filters. Each element: + - code (str): Dimension code, e.g. "GEO" + - filter (str): "item" (exact), "all" (wildcard), "top" (first N), "bottom" (last N) + - values (list[str]): Filter values + + You only need to specify dimensions you care about. + Fixed dimensions (single-valued) are auto-included. + If you omit MEASURE_TYPE, all measures are returned. + All other dimensions MUST be specified or a ValueError is raised. + + max_rows: Max rows to return (default 1000, 0 for unlimited) + + Year values: use "2020" (auto-translated to "2020_2020") or full format. + + Returns labeled rows with truncation info. Check "truncated" field. + """ + api = get_client() + + raw_dims = await api.get_dimensions(source_id, table_id) + dim_list = raw_dims if isinstance(raw_dims, list) else [] + + query_dims = complete_query_dimensions(dim_list, dimensions) + + body = {"dimensions": query_dims} + + try: + csv_text = await api.post_data(source_id, table_id, body) + except ApiError as e: + raise ValueError(f"API error: {e.detail}") from e + + parsed = parse_csv_to_rows(csv_text, max_rows) + parsed["dimensions_used"] = { + d["code"]: {"filter": d["filter"], "values": d["values"]} + for d in query_dims + } + + try: + info = await api.get_table_info(source_id, table_id) + parsed["table"] = info.get("title", "") + except Exception: + pass + + return parsed + + +async def get_query_template(source_id: str, table_id: int) -> dict: + """Get the raw query template for a table. + + Returns the exact JSON body the API expects for data queries. + Useful when query_data auto-completion isn't behaving as expected, + or to see all available values for every dimension. + + Args: + source_id: Source identifier + table_id: Table ID + """ + api = get_client() + return await api.get_query_template(source_id, table_id) + + +# --------------------------------------------------------------------------- +# Server construction and entry point +# --------------------------------------------------------------------------- + +def _build_mcp(host: str, port: int) -> FastMCP: + @asynccontextmanager + async def _lifespan(_server: FastMCP): + init_client() + logger.info("HTTP client initialized") + try: + yield + finally: + await close_client() + logger.info("HTTP client closed") + + server = FastMCP("fhi-statistikk", host=host, port=port, lifespan=_lifespan) + + server.tool(name="list_sources")(list_sources) + server.tool(name="list_tables")(list_tables) + server.tool(name="describe_table")(describe_table) + server.tool(name="get_dimension_values")(get_dimension_values) + server.tool(name="query_data")(query_data) + server.tool(name="get_query_template")(get_query_template) + + return server + + +def main(): + ap = argparse.ArgumentParser(description="FHI Statistikk MCP Server") + ap.add_argument( + "--transport", + default="sse", + choices=["stdio", "sse", "streamable-http"], + ) + ap.add_argument("--host", default="0.0.0.0") + ap.add_argument("--port", type=int, default=8000) + args = ap.parse_args() + + logger.info("Starting FHI Statistikk MCP server") + logger.info(" API: %s", "https://statistikk-data.fhi.no/api/open/v1/") + logger.info(" Transport: %s on %s:%d", args.transport, args.host, args.port) + + server = _build_mcp(args.host, args.port) + server.run(transport=args.transport) + + +if __name__ == "__main__": + main() diff --git a/src/fhi_statistikk_mcp/transformers.py b/src/fhi_statistikk_mcp/transformers.py new file mode 100644 index 0000000..539a059 --- /dev/null +++ b/src/fhi_statistikk_mcp/transformers.py @@ -0,0 +1,347 @@ +"""Data transformation utilities for FHI API responses.""" + +import csv +import io +import re +import unicodedata + + +# --- HTML stripping --- + +_HTML_TAG_RE = re.compile(r"<[^>]+>") +_WHITESPACE_RE = re.compile(r"\s+") + + +def strip_html(text: str) -> str: + """Remove HTML tags, decode entities, collapse whitespace.""" + if not text: + return text + text = _HTML_TAG_RE.sub(" ", text) + text = ( + text.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(" ", " ") + .replace(""", '"') + .replace("'", "'") + ) + return _WHITESPACE_RE.sub(" ", text).strip() + + +# --- Accent-insensitive search --- + +_NORDIC_MAP = str.maketrans({ + "æ": "a", "ø": "o", "å": "a", + "ü": "u", +}) + + +def normalize_for_search(text: str) -> str: + """Normalize for accent-insensitive comparison. + + Handles Nordic characters (æøå) and combining accents. + "tromso" matches "Tromsø", "barum" matches "Bærum". + """ + lowered = text.lower().translate(_NORDIC_MAP) + nfd = unicodedata.normalize("NFD", lowered) + return "".join(c for c in nfd if unicodedata.category(c) != "Mn") + + +def matches_search(text: str, query: str) -> bool: + """Check if all query words appear in text (accent-insensitive).""" + normalized = normalize_for_search(text) + words = normalize_for_search(query).split() + return all(w in normalized for w in words) + + +# --- Year normalization --- + +def normalize_year_value(value: str) -> str: + """Convert "2020" to "2020_2020" if not already in that format.""" + if "_" not in value and value.isdigit(): + return f"{value}_{value}" + return value + + +# --- Category tree operations --- + +def flatten_categories(categories: list[dict]) -> list[dict]: + """Flatten a nested category tree into a flat list with parent_value refs. + + Input: [{value, label, children: [{...}]}] + Output: [{value, label, parent_value: str|None}, ...] + """ + flat: list[dict] = [] + + def _walk(nodes: list[dict], parent: str | None) -> None: + for node in nodes: + flat.append({ + "value": node.get("value", ""), + "label": node.get("label", ""), + "parent_value": parent, + }) + children = node.get("children") or [] + if children: + _walk(children, node.get("value", "")) + + _walk(categories, None) + return flat + + +def navigate_hierarchy( + raw_categories: list[dict], + parent_value: str | None = None, + search: str | None = None, +) -> list[dict]: + """Navigate a nested category tree. Returns [{value, label, child_count}].""" + flat = flatten_categories(raw_categories) + + if search: + return [ + { + "value": c["value"], + "label": c["label"], + "child_count": _count_children(c["value"], flat), + } + for c in flat + if matches_search(c["label"], search) + ] + + if parent_value is None: + targets = [c for c in flat if c["parent_value"] is None] + else: + targets = [c for c in flat if c["parent_value"] == parent_value] + + return [ + { + "value": c["value"], + "label": c["label"], + "child_count": _count_children(c["value"], flat), + } + for c in targets + ] + + +# --- Dimension summarization --- + +def summarize_dimensions(dimensions: list[dict]) -> list[dict]: + """Transform raw dimension data into agent-friendly summaries.""" + result = [] + for dim in dimensions: + code = dim.get("code", "") + label = dim.get("label", "") + raw_categories = dim.get("categories", []) + flat = flatten_categories(raw_categories) + + summary: dict = { + "code": code, + "label": label, + "total_categories": len(flat), + } + + has_hierarchy = any(c["parent_value"] is not None for c in flat) + summary["is_hierarchical"] = has_hierarchy + + if len(flat) == 1: + summary["is_fixed"] = True + summary["values"] = [ + {"value": flat[0]["value"], "label": flat[0]["label"]} + ] + summary["note"] = "Single-valued, auto-included in queries" + elif has_hierarchy and len(flat) > 20: + top_level = [c for c in flat if c["parent_value"] is None] + summary["hierarchy_depth"] = _compute_depth(flat) + summary["top_level_values"] = [ + { + "value": c["value"], + "label": c["label"], + "child_count": _count_children(c["value"], flat), + } + for c in top_level + ] + summary["note"] = "Use get_dimension_values to drill into sub-levels" + elif is_year_dimension(code, flat): + values = [c["value"] for c in flat] + years = _extract_year_range(values) + summary["is_hierarchical"] = False + summary["value_format"] = "YYYY_YYYY (e.g. 2020_2020)" + if years: + summary["range"] = f"{years[0]}..{years[-1]}" + if len(flat) <= 50: + summary["values"] = values + else: + summary["values"] = [ + {"value": c["value"], "label": c["label"]} + for c in flat + ] + + result.append(summary) + return result + + +def extract_metadata_fields(metadata: dict) -> dict: + """Extract key fields from metadata response. + + API returns: {name, isOfficialStatistics, paragraphs: [{header, content}]} + """ + fields: dict = {} + + if isinstance(metadata, dict): + if metadata.get("isOfficialStatistics") is not None: + fields["is_official_statistics"] = metadata["isOfficialStatistics"] + paragraphs = metadata.get("paragraphs", []) + elif isinstance(metadata, list): + paragraphs = metadata + else: + paragraphs = [] + + for section in paragraphs: + header = (section.get("header") or "").lower() + content = strip_html(section.get("content") or "") + if not content: + continue + + if "beskrivelse" in header or "description" in header: + fields["description"] = content + elif "oppdater" in header or "frekvens" in header: + fields["update_frequency"] = content + elif "nøkkelord" in header or "keyword" in header or "emneord" in header: + fields["keywords"] = [k.strip() for k in content.split(",")] + elif "kilde" in header or "source" in header or "institusjon" in header: + fields["source_institution"] = content + + return fields + + +# --- Query dimension completion --- + +def complete_query_dimensions( + dim_definitions: list[dict], + user_dimensions: list[dict], +) -> list[dict]: + """Build complete query dimension list from user input and table definitions. + + - User-provided dimensions pass through (with year normalization for "item" filter). + - Fixed dimensions (1 category) are auto-included. + - MEASURE_TYPE defaults to filter="all", values=["*"]. + - Missing non-fixed dimensions raise ValueError. + """ + for d in user_dimensions: + if "code" not in d: + raise ValueError(f"Dimension entry missing 'code' key: {d}") + + provided = {d["code"].upper(): d for d in user_dimensions} + query_dims = [] + missing = [] + + for dim_def in dim_definitions: + code = dim_def.get("code", "") + raw_categories = dim_def.get("categories", []) + flat = flatten_categories(raw_categories) + upper_code = code.upper() + + if upper_code in provided: + d = provided[upper_code] + filt = d.get("filter", "item") + vals = d.get("values", []) + if filt == "item" and is_year_dimension(code, flat): + vals = [normalize_year_value(v) for v in vals] + query_dims.append({"code": code, "filter": filt, "values": vals}) + elif len(flat) == 1: + query_dims.append({ + "code": code, + "filter": "item", + "values": [flat[0]["value"]], + }) + elif upper_code == "MEASURE_TYPE": + query_dims.append({ + "code": code, + "filter": "all", + "values": ["*"], + }) + else: + missing.append(code) + + if missing: + raise ValueError( + f"Missing required dimensions: {', '.join(missing)}. " + "Specify these or use filter='all' with values=['*'] to include all." + ) + + return query_dims + + +# --- CSV parsing --- + +def parse_csv_to_rows(csv_text: str, max_rows: int = 1000) -> dict: + """Parse semicolon-delimited CSV response into structured rows.""" + reader = csv.DictReader(io.StringIO(csv_text), delimiter=";") + rows = [] + total = 0 + for row in reader: + total += 1 + if max_rows > 0 and len(rows) >= max_rows: + continue # keep counting total + cleaned = {} + for k, v in row.items(): + if k is None: + continue + cleaned[k.strip()] = _try_numeric(v.strip() if v else "") + rows.append(cleaned) + + return { + "rows": rows, + "total_rows": total, + "truncated": total > len(rows), + } + + +# --- Internal helpers --- + +def _count_children(value: str, flat: list[dict]) -> int: + return sum(1 for c in flat if c["parent_value"] == value) + + +def _compute_depth(flat: list[dict]) -> int: + """Compute hierarchy depth with cycle detection.""" + parent_map = {c["value"]: c["parent_value"] for c in flat} + max_depth = 1 + for val in parent_map: + depth = 1 + current = val + seen: set[str] = set() + while parent_map.get(current) and current not in seen: + seen.add(current) + current = parent_map[current] + depth += 1 + max_depth = max(max_depth, depth) + return max_depth + + +def is_year_dimension(code: str, flat: list[dict]) -> bool: + if code.upper() in ("AAR", "YEAR"): + return True + if flat and re.match(r"^\d{4}_\d{4}$", flat[0]["value"]): + return True + return False + + +def _extract_year_range(values: list[str]) -> list[int]: + years = [] + for v in values: + m = re.match(r"^(\d{4})(?:_\d{4})?$", v) + if m: + years.append(int(m.group(1))) + return sorted(years) + + +def _try_numeric(value: str): + """Try to convert a string to int or float. Returns None for missing-data symbols.""" + if not value or value in ("..", ":", "-"): + return None + try: + if "." in value or "," in value: + return float(value.replace(",", ".")) + return int(value) + except ValueError: + return value diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..f4a3b21 Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/__pycache__/test_cache.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_cache.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..80fcc30 Binary files /dev/null and b/tests/__pycache__/test_cache.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/__pycache__/test_transformers.cpython-312-pytest-9.0.2.pyc b/tests/__pycache__/test_transformers.cpython-312-pytest-9.0.2.pyc new file mode 100644 index 0000000..c7584e6 Binary files /dev/null and b/tests/__pycache__/test_transformers.cpython-312-pytest-9.0.2.pyc differ diff --git a/tests/fixtures/data_185.csv b/tests/fixtures/data_185.csv new file mode 100644 index 0000000..79a0426 --- /dev/null +++ b/tests/fixtures/data_185.csv @@ -0,0 +1,2 @@ +"Geografi";"År";"Kjønn";"Alder";"antall";"prosent vekst";"FLAGG" +"Oslo";"2024";"kjønn samlet";"alle aldre";"6580";"0.916804837608505";"" diff --git a/tests/fixtures/dimensions_185.json b/tests/fixtures/dimensions_185.json new file mode 100644 index 0000000..2a00d9d --- /dev/null +++ b/tests/fixtures/dimensions_185.json @@ -0,0 +1,2234 @@ +{ + "dimensions": [ + { + "code": "GEO", + "label": "Geografi", + "categories": [ + { + "label": "Hele landet", + "value": "0", + "children": [ + { + "label": "Oslo (fylke)", + "value": "03", + "children": [ + { + "label": "Oslo", + "value": "0301", + "children": [ + { + "label": "Bydel Gamle Oslo", + "value": "030101", + "children": [] + }, + { + "label": "Bydel Grünerløkka", + "value": "030102", + "children": [] + }, + { + "label": "Bydel Sagene", + "value": "030103", + "children": [] + }, + { + "label": "Bydel St. Hanshaugen", + "value": "030104", + "children": [] + }, + { + "label": "Bydel Frogner", + "value": "030105", + "children": [] + }, + { + "label": "Bydel Ullern", + "value": "030106", + "children": [] + }, + { + "label": "Bydel Vestre Aker", + "value": "030107", + "children": [] + }, + { + "label": "Bydel Nordre Aker", + "value": "030108", + "children": [] + }, + { + "label": "Bydel Bjerke", + "value": "030109", + "children": [] + }, + { + "label": "Bydel Grorud", + "value": "030110", + "children": [] + }, + { + "label": "Bydel Stovner", + "value": "030111", + "children": [] + }, + { + "label": "Bydel Alna", + "value": "030112", + "children": [] + }, + { + "label": "Bydel Østensjø", + "value": "030113", + "children": [] + }, + { + "label": "Bydel Nordstrand", + "value": "030114", + "children": [] + }, + { + "label": "Bydel Søndre Nordstrand", + "value": "030115", + "children": [] + } + ] + } + ] + }, + { + "label": "Østfold", + "value": "31", + "children": [ + { + "label": "Halden", + "value": "3101", + "children": [] + }, + { + "label": "Moss", + "value": "3103", + "children": [] + }, + { + "label": "Sarpsborg", + "value": "3105", + "children": [] + }, + { + "label": "Fredrikstad", + "value": "3107", + "children": [] + }, + { + "label": "Hvaler", + "value": "3110", + "children": [] + }, + { + "label": "Råde", + "value": "3112", + "children": [] + }, + { + "label": "Våler (Østfold)", + "value": "3114", + "children": [] + }, + { + "label": "Skiptvet", + "value": "3116", + "children": [] + }, + { + "label": "Indre Østfold", + "value": "3118", + "children": [] + }, + { + "label": "Rakkestad", + "value": "3120", + "children": [] + }, + { + "label": "Marker", + "value": "3122", + "children": [] + }, + { + "label": "Aremark", + "value": "3124", + "children": [] + } + ] + }, + { + "label": "Akershus", + "value": "32", + "children": [ + { + "label": "Bærum", + "value": "3201", + "children": [] + }, + { + "label": "Asker", + "value": "3203", + "children": [] + }, + { + "label": "Lillestrøm", + "value": "3205", + "children": [] + }, + { + "label": "Nordre Follo", + "value": "3207", + "children": [] + }, + { + "label": "Ullensaker", + "value": "3209", + "children": [] + }, + { + "label": "Nesodden", + "value": "3212", + "children": [] + }, + { + "label": "Frogn", + "value": "3214", + "children": [] + }, + { + "label": "Vestby", + "value": "3216", + "children": [] + }, + { + "label": "Ås", + "value": "3218", + "children": [] + }, + { + "label": "Enebakk", + "value": "3220", + "children": [] + }, + { + "label": "Lørenskog", + "value": "3222", + "children": [] + }, + { + "label": "Rælingen", + "value": "3224", + "children": [] + }, + { + "label": "Aurskog-Høland", + "value": "3226", + "children": [] + }, + { + "label": "Nes", + "value": "3228", + "children": [] + }, + { + "label": "Gjerdrum", + "value": "3230", + "children": [] + }, + { + "label": "Nittedal", + "value": "3232", + "children": [] + }, + { + "label": "Lunner", + "value": "3234", + "children": [] + }, + { + "label": "Jevnaker", + "value": "3236", + "children": [] + }, + { + "label": "Nannestad", + "value": "3238", + "children": [] + }, + { + "label": "Eidsvoll", + "value": "3240", + "children": [] + }, + { + "label": "Hurdal", + "value": "3242", + "children": [] + } + ] + }, + { + "label": "Buskerud", + "value": "33", + "children": [ + { + "label": "Drammen", + "value": "3301", + "children": [] + }, + { + "label": "Kongsberg", + "value": "3303", + "children": [] + }, + { + "label": "Ringerike", + "value": "3305", + "children": [] + }, + { + "label": "Hole", + "value": "3310", + "children": [] + }, + { + "label": "Lier", + "value": "3312", + "children": [] + }, + { + "label": "Øvre Eiker", + "value": "3314", + "children": [] + }, + { + "label": "Modum", + "value": "3316", + "children": [] + }, + { + "label": "Krødsherad", + "value": "3318", + "children": [] + }, + { + "label": "Flå", + "value": "3320", + "children": [] + }, + { + "label": "Nesbyen", + "value": "3322", + "children": [] + }, + { + "label": "Gol", + "value": "3324", + "children": [] + }, + { + "label": "Hemsedal", + "value": "3326", + "children": [] + }, + { + "label": "Ål", + "value": "3328", + "children": [] + }, + { + "label": "Hol", + "value": "3330", + "children": [] + }, + { + "label": "Sigdal", + "value": "3332", + "children": [] + }, + { + "label": "Flesberg", + "value": "3334", + "children": [] + }, + { + "label": "Rollag", + "value": "3336", + "children": [] + }, + { + "label": "Nore og Uvdal", + "value": "3338", + "children": [] + } + ] + }, + { + "label": "Innlandet", + "value": "34", + "children": [ + { + "label": "Kongsvinger", + "value": "3401", + "children": [] + }, + { + "label": "Hamar", + "value": "3403", + "children": [] + }, + { + "label": "Lillehammer", + "value": "3405", + "children": [] + }, + { + "label": "Gjøvik", + "value": "3407", + "children": [] + }, + { + "label": "Ringsaker", + "value": "3411", + "children": [] + }, + { + "label": "Løten", + "value": "3412", + "children": [] + }, + { + "label": "Stange", + "value": "3413", + "children": [] + }, + { + "label": "Nord-Odal", + "value": "3414", + "children": [] + }, + { + "label": "Sør-Odal", + "value": "3415", + "children": [] + }, + { + "label": "Eidskog", + "value": "3416", + "children": [] + }, + { + "label": "Grue", + "value": "3417", + "children": [] + }, + { + "label": "Åsnes", + "value": "3418", + "children": [] + }, + { + "label": "Våler (Innlandet)", + "value": "3419", + "children": [] + }, + { + "label": "Elverum", + "value": "3420", + "children": [] + }, + { + "label": "Trysil", + "value": "3421", + "children": [] + }, + { + "label": "Åmot", + "value": "3422", + "children": [] + }, + { + "label": "Stor-Elvdal", + "value": "3423", + "children": [] + }, + { + "label": "Rendalen", + "value": "3424", + "children": [] + }, + { + "label": "Engerdal", + "value": "3425", + "children": [] + }, + { + "label": "Tolga", + "value": "3426", + "children": [] + }, + { + "label": "Tynset", + "value": "3427", + "children": [] + }, + { + "label": "Alvdal", + "value": "3428", + "children": [] + }, + { + "label": "Folldal", + "value": "3429", + "children": [] + }, + { + "label": "Os", + "value": "3430", + "children": [] + }, + { + "label": "Dovre", + "value": "3431", + "children": [] + }, + { + "label": "Lesja", + "value": "3432", + "children": [] + }, + { + "label": "Skjåk", + "value": "3433", + "children": [] + }, + { + "label": "Lom", + "value": "3434", + "children": [] + }, + { + "label": "Vågå", + "value": "3435", + "children": [] + }, + { + "label": "Nord-Fron", + "value": "3436", + "children": [] + }, + { + "label": "Sel", + "value": "3437", + "children": [] + }, + { + "label": "Sør-Fron", + "value": "3438", + "children": [] + }, + { + "label": "Ringebu", + "value": "3439", + "children": [] + }, + { + "label": "Øyer", + "value": "3440", + "children": [] + }, + { + "label": "Gausdal", + "value": "3441", + "children": [] + }, + { + "label": "Østre Toten", + "value": "3442", + "children": [] + }, + { + "label": "Vestre Toten", + "value": "3443", + "children": [] + }, + { + "label": "Gran", + "value": "3446", + "children": [] + }, + { + "label": "Søndre Land", + "value": "3447", + "children": [] + }, + { + "label": "Nordre Land", + "value": "3448", + "children": [] + }, + { + "label": "Sør-Aurdal", + "value": "3449", + "children": [] + }, + { + "label": "Etnedal", + "value": "3450", + "children": [] + }, + { + "label": "Nord-Aurdal", + "value": "3451", + "children": [] + }, + { + "label": "Vestre Slidre", + "value": "3452", + "children": [] + }, + { + "label": "Øystre Slidre", + "value": "3453", + "children": [] + }, + { + "label": "Vang", + "value": "3454", + "children": [] + } + ] + }, + { + "label": "Vestfold", + "value": "39", + "children": [ + { + "label": "Horten", + "value": "3901", + "children": [] + }, + { + "label": "Holmestrand", + "value": "3903", + "children": [] + }, + { + "label": "Tønsberg", + "value": "3905", + "children": [] + }, + { + "label": "Sandefjord", + "value": "3907", + "children": [] + }, + { + "label": "Larvik", + "value": "3909", + "children": [] + }, + { + "label": "Færder", + "value": "3911", + "children": [] + } + ] + }, + { + "label": "Telemark", + "value": "40", + "children": [ + { + "label": "Porsgrunn", + "value": "4001", + "children": [] + }, + { + "label": "Skien", + "value": "4003", + "children": [] + }, + { + "label": "Notodden", + "value": "4005", + "children": [] + }, + { + "label": "Siljan", + "value": "4010", + "children": [] + }, + { + "label": "Bamble", + "value": "4012", + "children": [] + }, + { + "label": "Kragerø", + "value": "4014", + "children": [] + }, + { + "label": "Drangedal", + "value": "4016", + "children": [] + }, + { + "label": "Nome", + "value": "4018", + "children": [] + }, + { + "label": "Midt-Telemark", + "value": "4020", + "children": [] + }, + { + "label": "Seljord", + "value": "4022", + "children": [] + }, + { + "label": "Hjartdal", + "value": "4024", + "children": [] + }, + { + "label": "Tinn", + "value": "4026", + "children": [] + }, + { + "label": "Kviteseid", + "value": "4028", + "children": [] + }, + { + "label": "Nissedal", + "value": "4030", + "children": [] + }, + { + "label": "Fyresdal", + "value": "4032", + "children": [] + }, + { + "label": "Tokke", + "value": "4034", + "children": [] + }, + { + "label": "Vinje", + "value": "4036", + "children": [] + } + ] + }, + { + "label": "Agder", + "value": "42", + "children": [ + { + "label": "Risør", + "value": "4201", + "children": [] + }, + { + "label": "Grimstad", + "value": "4202", + "children": [] + }, + { + "label": "Arendal", + "value": "4203", + "children": [] + }, + { + "label": "Kristiansand", + "value": "4204", + "children": [] + }, + { + "label": "Lindesnes", + "value": "4205", + "children": [] + }, + { + "label": "Farsund", + "value": "4206", + "children": [] + }, + { + "label": "Flekkefjord", + "value": "4207", + "children": [] + }, + { + "label": "Gjerstad", + "value": "4211", + "children": [] + }, + { + "label": "Vegårshei", + "value": "4212", + "children": [] + }, + { + "label": "Tvedestrand", + "value": "4213", + "children": [] + }, + { + "label": "Froland", + "value": "4214", + "children": [] + }, + { + "label": "Lillesand", + "value": "4215", + "children": [] + }, + { + "label": "Birkenes", + "value": "4216", + "children": [] + }, + { + "label": "Åmli", + "value": "4217", + "children": [] + }, + { + "label": "Iveland", + "value": "4218", + "children": [] + }, + { + "label": "Evje og Hornnes", + "value": "4219", + "children": [] + }, + { + "label": "Bygland", + "value": "4220", + "children": [] + }, + { + "label": "Valle", + "value": "4221", + "children": [] + }, + { + "label": "Bykle", + "value": "4222", + "children": [] + }, + { + "label": "Vennesla", + "value": "4223", + "children": [] + }, + { + "label": "Åseral", + "value": "4224", + "children": [] + }, + { + "label": "Lyngdal", + "value": "4225", + "children": [] + }, + { + "label": "Hægebostad", + "value": "4226", + "children": [] + }, + { + "label": "Kvinesdal", + "value": "4227", + "children": [] + }, + { + "label": "Sirdal", + "value": "4228", + "children": [] + } + ] + }, + { + "label": "Rogaland", + "value": "11", + "children": [ + { + "label": "Eigersund", + "value": "1101", + "children": [] + }, + { + "label": "Stavanger", + "value": "1103", + "children": [ + { + "label": "Hundvåg kommunedel", + "value": "110301", + "children": [] + }, + { + "label": "Tasta kommunedel", + "value": "110302", + "children": [] + }, + { + "label": "Eiganes og Våland", + "value": "110303", + "children": [] + }, + { + "label": "Madla kommunedel", + "value": "110304", + "children": [] + }, + { + "label": "Storhaug kommunedel", + "value": "110305", + "children": [] + }, + { + "label": "Hillevåg kommunedel", + "value": "110306", + "children": [] + }, + { + "label": "Hinna kommunedel", + "value": "110307", + "children": [] + }, + { + "label": "Finnøy kommunedel", + "value": "110308", + "children": [] + }, + { + "label": "Rennesøy kommunedel", + "value": "110309", + "children": [] + } + ] + }, + { + "label": "Haugesund", + "value": "1106", + "children": [] + }, + { + "label": "Sandnes", + "value": "1108", + "children": [] + }, + { + "label": "Sokndal", + "value": "1111", + "children": [] + }, + { + "label": "Lund", + "value": "1112", + "children": [] + }, + { + "label": "Bjerkreim", + "value": "1114", + "children": [] + }, + { + "label": "Hå", + "value": "1119", + "children": [] + }, + { + "label": "Klepp", + "value": "1120", + "children": [] + }, + { + "label": "Time", + "value": "1121", + "children": [] + }, + { + "label": "Gjesdal", + "value": "1122", + "children": [] + }, + { + "label": "Sola", + "value": "1124", + "children": [] + }, + { + "label": "Randaberg", + "value": "1127", + "children": [] + }, + { + "label": "Strand", + "value": "1130", + "children": [] + }, + { + "label": "Hjelmeland", + "value": "1133", + "children": [] + }, + { + "label": "Suldal", + "value": "1134", + "children": [] + }, + { + "label": "Sauda", + "value": "1135", + "children": [] + }, + { + "label": "Kvitsøy", + "value": "1144", + "children": [] + }, + { + "label": "Bokn", + "value": "1145", + "children": [] + }, + { + "label": "Tysvær", + "value": "1146", + "children": [] + }, + { + "label": "Karmøy", + "value": "1149", + "children": [] + }, + { + "label": "Utsira", + "value": "1151", + "children": [] + }, + { + "label": "Vindafjord", + "value": "1160", + "children": [] + } + ] + }, + { + "label": "Vestland", + "value": "46", + "children": [ + { + "label": "Bergen", + "value": "4601", + "children": [ + { + "label": "Arna bydel", + "value": "460101", + "children": [] + }, + { + "label": "Bergenhus bydel", + "value": "460102", + "children": [] + }, + { + "label": "Fana bydel", + "value": "460103", + "children": [] + }, + { + "label": "Fyllingsdalen bydel", + "value": "460104", + "children": [] + }, + { + "label": "Laksevåg bydel", + "value": "460105", + "children": [] + }, + { + "label": "Ytrebygda bydel", + "value": "460106", + "children": [] + }, + { + "label": "Årstad bydel", + "value": "460107", + "children": [] + }, + { + "label": "Åsane bydel", + "value": "460108", + "children": [] + } + ] + }, + { + "label": "Kinn", + "value": "4602", + "children": [] + }, + { + "label": "Etne", + "value": "4611", + "children": [] + }, + { + "label": "Sveio", + "value": "4612", + "children": [] + }, + { + "label": "Bømlo", + "value": "4613", + "children": [] + }, + { + "label": "Stord", + "value": "4614", + "children": [] + }, + { + "label": "Fitjar", + "value": "4615", + "children": [] + }, + { + "label": "Tysnes", + "value": "4616", + "children": [] + }, + { + "label": "Kvinnherad", + "value": "4617", + "children": [] + }, + { + "label": "Ullensvang", + "value": "4618", + "children": [] + }, + { + "label": "Eidfjord", + "value": "4619", + "children": [] + }, + { + "label": "Ulvik", + "value": "4620", + "children": [] + }, + { + "label": "Voss", + "value": "4621", + "children": [] + }, + { + "label": "Kvam", + "value": "4622", + "children": [] + }, + { + "label": "Samnanger", + "value": "4623", + "children": [] + }, + { + "label": "Bjørnafjorden", + "value": "4624", + "children": [] + }, + { + "label": "Austevoll", + "value": "4625", + "children": [] + }, + { + "label": "Øygarden", + "value": "4626", + "children": [] + }, + { + "label": "Askøy", + "value": "4627", + "children": [] + }, + { + "label": "Vaksdal", + "value": "4628", + "children": [] + }, + { + "label": "Modalen", + "value": "4629", + "children": [] + }, + { + "label": "Osterøy", + "value": "4630", + "children": [] + }, + { + "label": "Alver", + "value": "4631", + "children": [] + }, + { + "label": "Austrheim", + "value": "4632", + "children": [] + }, + { + "label": "Fedje", + "value": "4633", + "children": [] + }, + { + "label": "Masfjorden", + "value": "4634", + "children": [] + }, + { + "label": "Gulen", + "value": "4635", + "children": [] + }, + { + "label": "Solund", + "value": "4636", + "children": [] + }, + { + "label": "Hyllestad", + "value": "4637", + "children": [] + }, + { + "label": "Høyanger", + "value": "4638", + "children": [] + }, + { + "label": "Vik", + "value": "4639", + "children": [] + }, + { + "label": "Sogndal", + "value": "4640", + "children": [] + }, + { + "label": "Aurland", + "value": "4641", + "children": [] + }, + { + "label": "Lærdal", + "value": "4642", + "children": [] + }, + { + "label": "Årdal", + "value": "4643", + "children": [] + }, + { + "label": "Luster", + "value": "4644", + "children": [] + }, + { + "label": "Askvoll", + "value": "4645", + "children": [] + }, + { + "label": "Fjaler", + "value": "4646", + "children": [] + }, + { + "label": "Sunnfjord", + "value": "4647", + "children": [] + }, + { + "label": "Bremanger", + "value": "4648", + "children": [] + }, + { + "label": "Stad", + "value": "4649", + "children": [] + }, + { + "label": "Gloppen", + "value": "4650", + "children": [] + }, + { + "label": "Stryn", + "value": "4651", + "children": [] + } + ] + }, + { + "label": "Møre og Romsdal", + "value": "15", + "children": [ + { + "label": "Kristiansund", + "value": "1505", + "children": [] + }, + { + "label": "Molde", + "value": "1506", + "children": [] + }, + { + "label": "Ålesund", + "value": "1508", + "children": [] + }, + { + "label": "Vanylven", + "value": "1511", + "children": [] + }, + { + "label": "Sande", + "value": "1514", + "children": [] + }, + { + "label": "Herøy (Møre og Romsdal)", + "value": "1515", + "children": [] + }, + { + "label": "Ulstein", + "value": "1516", + "children": [] + }, + { + "label": "Hareid", + "value": "1517", + "children": [] + }, + { + "label": "Ørsta", + "value": "1520", + "children": [] + }, + { + "label": "Stranda", + "value": "1525", + "children": [] + }, + { + "label": "Sykkylven", + "value": "1528", + "children": [] + }, + { + "label": "Sula", + "value": "1531", + "children": [] + }, + { + "label": "Giske", + "value": "1532", + "children": [] + }, + { + "label": "Vestnes", + "value": "1535", + "children": [] + }, + { + "label": "Rauma", + "value": "1539", + "children": [] + }, + { + "label": "Aukra", + "value": "1547", + "children": [] + }, + { + "label": "Averøy", + "value": "1554", + "children": [] + }, + { + "label": "Gjemnes", + "value": "1557", + "children": [] + }, + { + "label": "Tingvoll", + "value": "1560", + "children": [] + }, + { + "label": "Sunndal", + "value": "1563", + "children": [] + }, + { + "label": "Surnadal", + "value": "1566", + "children": [] + }, + { + "label": "Smøla", + "value": "1573", + "children": [] + }, + { + "label": "Aure", + "value": "1576", + "children": [] + }, + { + "label": "Volda", + "value": "1577", + "children": [] + }, + { + "label": "Fjord", + "value": "1578", + "children": [] + }, + { + "label": "Hustadvika", + "value": "1579", + "children": [] + }, + { + "label": "Haram", + "value": "1580", + "children": [] + } + ] + }, + { + "label": "Trøndelag Trööndelage", + "value": "50", + "children": [ + { + "label": "Trondheim Tråante", + "value": "5001", + "children": [ + { + "label": "Midtbyen bydel", + "value": "500101", + "children": [] + }, + { + "label": "Østbyen bydel", + "value": "500102", + "children": [] + }, + { + "label": "Lerkendal bydel", + "value": "500103", + "children": [] + }, + { + "label": "Heimdal bydel", + "value": "500104", + "children": [] + } + ] + }, + { + "label": "Steinkjer", + "value": "5006", + "children": [] + }, + { + "label": "Namsos Nåavmesjenjaelmie", + "value": "5007", + "children": [] + }, + { + "label": "Frøya", + "value": "5014", + "children": [] + }, + { + "label": "Osen", + "value": "5020", + "children": [] + }, + { + "label": "Oppdal", + "value": "5021", + "children": [] + }, + { + "label": "Rennebu", + "value": "5022", + "children": [] + }, + { + "label": "Røros Rossen", + "value": "5025", + "children": [] + }, + { + "label": "Holtålen", + "value": "5026", + "children": [] + }, + { + "label": "Midtre Gauldal", + "value": "5027", + "children": [] + }, + { + "label": "Melhus", + "value": "5028", + "children": [] + }, + { + "label": "Skaun", + "value": "5029", + "children": [] + }, + { + "label": "Malvik", + "value": "5031", + "children": [] + }, + { + "label": "Selbu", + "value": "5032", + "children": [] + }, + { + "label": "Tydal", + "value": "5033", + "children": [] + }, + { + "label": "Meråker", + "value": "5034", + "children": [] + }, + { + "label": "Stjørdal", + "value": "5035", + "children": [] + }, + { + "label": "Frosta", + "value": "5036", + "children": [] + }, + { + "label": "Levanger", + "value": "5037", + "children": [] + }, + { + "label": "Verdal", + "value": "5038", + "children": [] + }, + { + "label": "Snåase Snåsa", + "value": "5041", + "children": [] + }, + { + "label": "Lierne", + "value": "5042", + "children": [] + }, + { + "label": "Raarvihke Røyrvik", + "value": "5043", + "children": [] + }, + { + "label": "Namsskogan", + "value": "5044", + "children": [] + }, + { + "label": "Grong", + "value": "5045", + "children": [] + }, + { + "label": "Høylandet", + "value": "5046", + "children": [] + }, + { + "label": "Overhalla", + "value": "5047", + "children": [] + }, + { + "label": "Flatanger", + "value": "5049", + "children": [] + }, + { + "label": "Leka", + "value": "5052", + "children": [] + }, + { + "label": "Inderøy", + "value": "5053", + "children": [] + }, + { + "label": "Indre Fosen", + "value": "5054", + "children": [] + }, + { + "label": "Heim", + "value": "5055", + "children": [] + }, + { + "label": "Hitra", + "value": "5056", + "children": [] + }, + { + "label": "Ørland", + "value": "5057", + "children": [] + }, + { + "label": "Åfjord", + "value": "5058", + "children": [] + }, + { + "label": "Orkland", + "value": "5059", + "children": [] + }, + { + "label": "Nærøysund", + "value": "5060", + "children": [] + }, + { + "label": "Rindal", + "value": "5061", + "children": [] + } + ] + }, + { + "label": "Nordland Nordlánnda", + "value": "18", + "children": [ + { + "label": "Bodø", + "value": "1804", + "children": [] + }, + { + "label": "Narvik", + "value": "1806", + "children": [] + }, + { + "label": "Bindal", + "value": "1811", + "children": [] + }, + { + "label": "Sømna", + "value": "1812", + "children": [] + }, + { + "label": "Brønnøy", + "value": "1813", + "children": [] + }, + { + "label": "Vega", + "value": "1815", + "children": [] + }, + { + "label": "Vevelstad", + "value": "1816", + "children": [] + }, + { + "label": "Herøy (Nordland)", + "value": "1818", + "children": [] + }, + { + "label": "Alstahaug", + "value": "1820", + "children": [] + }, + { + "label": "Leirfjord", + "value": "1822", + "children": [] + }, + { + "label": "Vefsn", + "value": "1824", + "children": [] + }, + { + "label": "Grane", + "value": "1825", + "children": [] + }, + { + "label": "Aarborte Hattfjelldal", + "value": "1826", + "children": [] + }, + { + "label": "Dønna", + "value": "1827", + "children": [] + }, + { + "label": "Nesna", + "value": "1828", + "children": [] + }, + { + "label": "Hemnes", + "value": "1832", + "children": [] + }, + { + "label": "Rana", + "value": "1833", + "children": [] + }, + { + "label": "Lurøy", + "value": "1834", + "children": [] + }, + { + "label": "Træna", + "value": "1835", + "children": [] + }, + { + "label": "Rødøy", + "value": "1836", + "children": [] + }, + { + "label": "Meløy", + "value": "1837", + "children": [] + }, + { + "label": "Gildeskål", + "value": "1838", + "children": [] + }, + { + "label": "Beiarn", + "value": "1839", + "children": [] + }, + { + "label": "Saltdal", + "value": "1840", + "children": [] + }, + { + "label": "Fauske Fuossko", + "value": "1841", + "children": [] + }, + { + "label": "Sørfold", + "value": "1845", + "children": [] + }, + { + "label": "Steigen", + "value": "1848", + "children": [] + }, + { + "label": "Lødingen", + "value": "1851", + "children": [] + }, + { + "label": "Evenes Evenášši", + "value": "1853", + "children": [] + }, + { + "label": "Røst", + "value": "1856", + "children": [] + }, + { + "label": "Værøy", + "value": "1857", + "children": [] + }, + { + "label": "Flakstad", + "value": "1859", + "children": [] + }, + { + "label": "Vestvågøy", + "value": "1860", + "children": [] + }, + { + "label": "Vågan", + "value": "1865", + "children": [] + }, + { + "label": "Hadsel", + "value": "1866", + "children": [] + }, + { + "label": "Bø", + "value": "1867", + "children": [] + }, + { + "label": "Øksnes", + "value": "1868", + "children": [] + }, + { + "label": "Sortland Suortá", + "value": "1870", + "children": [] + }, + { + "label": "Andøy", + "value": "1871", + "children": [] + }, + { + "label": "Moskenes", + "value": "1874", + "children": [] + }, + { + "label": "Hábmer Hamarøy", + "value": "1875", + "children": [] + } + ] + }, + { + "label": "Troms Romsa Tromssa", + "value": "55", + "children": [ + { + "label": "Tromsø", + "value": "5501", + "children": [] + }, + { + "label": "Harstad Hárstták", + "value": "5503", + "children": [] + }, + { + "label": "Kvæfjord", + "value": "5510", + "children": [] + }, + { + "label": "Dielddanuorri Tjeldsund", + "value": "5512", + "children": [] + }, + { + "label": "Ibestad", + "value": "5514", + "children": [] + }, + { + "label": "Gratangen", + "value": "5516", + "children": [] + }, + { + "label": "Loabák Lavangen", + "value": "5518", + "children": [] + }, + { + "label": "Bardu", + "value": "5520", + "children": [] + }, + { + "label": "Salangen", + "value": "5522", + "children": [] + }, + { + "label": "Målselv", + "value": "5524", + "children": [] + }, + { + "label": "Sørreisa", + "value": "5526", + "children": [] + }, + { + "label": "Dyrøy", + "value": "5528", + "children": [] + }, + { + "label": "Senja", + "value": "5530", + "children": [] + }, + { + "label": "Balsfjord", + "value": "5532", + "children": [] + }, + { + "label": "Karlsøy", + "value": "5534", + "children": [] + }, + { + "label": "Lyngen", + "value": "5536", + "children": [] + }, + { + "label": "Storfjord Omasvuotna Omasvuono", + "value": "5538", + "children": [] + }, + { + "label": "Gáivuotna Kåfjord Kaivuono", + "value": "5540", + "children": [] + }, + { + "label": "Skjervøy", + "value": "5542", + "children": [] + }, + { + "label": "Nordreisa Ráisa Raisi", + "value": "5544", + "children": [] + }, + { + "label": "Kvænangen", + "value": "5546", + "children": [] + } + ] + }, + { + "label": "Finnmark Finnmárku Finmarkku", + "value": "56", + "children": [ + { + "label": "Alta", + "value": "5601", + "children": [] + }, + { + "label": "Hammerfest Hámmerfeasta", + "value": "5603", + "children": [] + }, + { + "label": "Sør-Varanger", + "value": "5605", + "children": [] + }, + { + "label": "Vadsø", + "value": "5607", + "children": [] + }, + { + "label": "Kárášjohka Karasjok", + "value": "5610", + "children": [] + }, + { + "label": "Guovdageaidnu Kautokeino", + "value": "5612", + "children": [] + }, + { + "label": "Loppa", + "value": "5614", + "children": [] + }, + { + "label": "Hasvik", + "value": "5616", + "children": [] + }, + { + "label": "Måsøy", + "value": "5618", + "children": [] + }, + { + "label": "Nordkapp", + "value": "5620", + "children": [] + }, + { + "label": "Porsanger Porsángu Porsanki", + "value": "5622", + "children": [] + }, + { + "label": "Lebesby", + "value": "5624", + "children": [] + }, + { + "label": "Gamvik", + "value": "5626", + "children": [] + }, + { + "label": "Deatnu Tana", + "value": "5628", + "children": [] + }, + { + "label": "Berlevåg", + "value": "5630", + "children": [] + }, + { + "label": "Båtsfjord", + "value": "5632", + "children": [] + }, + { + "label": "Vardø", + "value": "5634", + "children": [] + }, + { + "label": "Unjárga Nesseby", + "value": "5636", + "children": [] + } + ] + } + ] + } + ] + }, + { + "code": "AAR", + "label": "År", + "categories": [ + { + "label": "2002", + "value": "2002_2002", + "children": [] + }, + { + "label": "2003", + "value": "2003_2003", + "children": [] + }, + { + "label": "2004", + "value": "2004_2004", + "children": [] + }, + { + "label": "2005", + "value": "2005_2005", + "children": [] + }, + { + "label": "2006", + "value": "2006_2006", + "children": [] + }, + { + "label": "2007", + "value": "2007_2007", + "children": [] + }, + { + "label": "2008", + "value": "2008_2008", + "children": [] + }, + { + "label": "2009", + "value": "2009_2009", + "children": [] + }, + { + "label": "2010", + "value": "2010_2010", + "children": [] + }, + { + "label": "2011", + "value": "2011_2011", + "children": [] + }, + { + "label": "2012", + "value": "2012_2012", + "children": [] + }, + { + "label": "2013", + "value": "2013_2013", + "children": [] + }, + { + "label": "2014", + "value": "2014_2014", + "children": [] + }, + { + "label": "2015", + "value": "2015_2015", + "children": [] + }, + { + "label": "2016", + "value": "2016_2016", + "children": [] + }, + { + "label": "2017", + "value": "2017_2017", + "children": [] + }, + { + "label": "2018", + "value": "2018_2018", + "children": [] + }, + { + "label": "2019", + "value": "2019_2019", + "children": [] + }, + { + "label": "2020", + "value": "2020_2020", + "children": [] + }, + { + "label": "2021", + "value": "2021_2021", + "children": [] + }, + { + "label": "2022", + "value": "2022_2022", + "children": [] + }, + { + "label": "2023", + "value": "2023_2023", + "children": [] + }, + { + "label": "2024", + "value": "2024_2024", + "children": [] + } + ] + }, + { + "code": "KJONN", + "label": "Kjønn", + "categories": [ + { + "label": "kjønn samlet", + "value": "0", + "children": [] + } + ] + }, + { + "code": "ALDER", + "label": "Alder", + "categories": [ + { + "label": "alle aldre", + "value": "0_120", + "children": [] + } + ] + }, + { + "code": "MEASURE_TYPE", + "label": "Måltall", + "categories": [ + { + "label": "antall", + "value": "TELLER", + "children": null + }, + { + "label": "prosent vekst", + "value": "RATE", + "children": null + } + ] + } + ] +} diff --git a/tests/fixtures/flags_185.json b/tests/fixtures/flags_185.json new file mode 100644 index 0000000..1e24519 --- /dev/null +++ b/tests/fixtures/flags_185.json @@ -0,0 +1,7 @@ +[ + { + "value": 0, + "symbol": "", + "description": "Verdi finnes i tabellen" + } +] diff --git a/tests/fixtures/metadata_185.json b/tests/fixtures/metadata_185.json new file mode 100644 index 0000000..2542e9a --- /dev/null +++ b/tests/fixtures/metadata_185.json @@ -0,0 +1,58 @@ +{ + "name": "Befolkningsvekst", + "isOfficialStatistics": false, + "paragraphs": [ + { + "header": "Beskrivelse", + "content": "

Differansen mellom befolkningsmengden i slutten av året (målt 1. januar året etter) og i begynnelsen av året (1. januar). Statistikken vises for kommune- og fylkesinndeling per 1.1.2024.

To måltall er tilgjengelig:

  1. Antall
  2. Prosent vekst = prosentvis vekst i folketallet, i prosent av folketall ved inngangen av året
" + }, + { + "header": "Begrunnelse for valg av indikatoren", + "content": "

Mange av indikatorene i statistikkbanken er relatert til befolkningstall og -sammensetning, og befolkningsveksten i en kommune bidrar til informasjon om dette. Omtrent en tredjedel av veksten i Norge skyldes fødselsoverskudd, mens resten skyldes netto innvandring.

Kilde:

FHIs folkehelserapport: Befolkningen i Norge

" + }, + { + "header": "Kildeinstitusjon", + "content": "Statistisk sentralbyrå (SSB)" + }, + { + "header": "Innsamling", + "content": "Statistikken beregnes fra Statistisk sentralbyrås befolkningsstatistikk som bygger på folkeregisteropplysninger." + }, + { + "header": "Tolking og feilkilder", + "content": "Det er tatt hensyn til mindre grensejusteringer mellom kommuner i statistikken." + }, + { + "header": "Datakvalitet", + "content": "En del feil ved innsamling og bearbeiding av dataene er uunngåelig. Det kan være kodefeil, revisjonsfeil, etc. Det er utført et omfattende arbeid for å minimalisere disse feilene, og disse feiltypene anses for å være relativt ubetydelige." + }, + { + "header": "Oppdateringsfrekvens", + "content": "Årlig" + }, + { + "header": "Nøkkelord", + "content": "Befolkning,Befolkningsvekst" + }, + { + "header": "Relatert materiale", + "content": "

FHIs folkehelserapport: Befolkningen i Norge

" + }, + { + "header": "Geografi", + "content": "

Hele landet, fylker og kommuner. Bydeler i Oslo, Bergen, Stavanger og Trondheim.

" + }, + { + "header": "År", + "content": "2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 og 2024" + }, + { + "header": "Kjønn", + "content": "kjønn samlet" + }, + { + "header": "Alder", + "content": "alle aldre" + } + ] +} diff --git a/tests/fixtures/query_185.json b/tests/fixtures/query_185.json new file mode 100644 index 0000000..92e7bfd --- /dev/null +++ b/tests/fixtures/query_185.json @@ -0,0 +1,474 @@ +{ + "dimensions": [ + { + "code": "GEO", + "filter": "item", + "values": [ + "0", + "3436", + "460104", + "4611", + "4617", + "4647", + "5027", + "1851", + "1857", + "1865", + "1866", + "1867", + "1868", + "1870", + "1871", + "1874", + "1875", + "5501", + "5510", + "5512", + "5514", + "5516", + "5518", + "5520", + "5522", + "5524", + "5526", + "5528", + "5530", + "5532", + "5534", + "5536", + "5540", + "5542", + "5544", + "5546", + "5601", + "5603", + "5605", + "3911", + "5026", + "5029", + "1856", + "030105", + "030106", + "3430", + "3336", + "3338", + "3401", + "3403", + "3407", + "3411", + "3412", + "3413", + "3414", + "1806", + "1811", + "1813", + "1815", + "1816", + "1818", + "1820", + "1822", + "1824", + "1825", + "1827", + "1828", + "1832", + "1833", + "15", + "50", + "18", + "55", + "56", + "03", + "0301", + "030108", + "030109", + "030110", + "030111", + "030112", + "030113", + "030114", + "030115", + "31", + "3101", + "3103", + "32", + "33", + "34", + "39", + "40", + "42", + "11", + "110301", + "110302", + "110304", + "110305", + "46", + "4217", + "4218", + "1834", + "1835", + "1836", + "1837", + "1839", + "1840", + "1841", + "1845", + "1848", + "5031", + "5033", + "5034", + "5035", + "5036", + "5037", + "5038", + "5041", + "5042", + "5044", + "5045", + "5046", + "5047", + "5049", + "5052", + "5053", + "5054", + "5055", + "5056", + "5058", + "5059", + "5060", + "5061", + "1804", + "030101", + "030102", + "030103", + "030104", + "3434", + "4204", + "110306", + "110307", + "110308", + "110309", + "460101", + "460102", + "460103", + "460105", + "460106", + "460107", + "4629", + "4650", + "5028", + "1853", + "1859", + "5614", + "5616", + "5618", + "5620", + "5622", + "5624", + "5626", + "5628", + "5630", + "5632", + "5634", + "3105", + "3107", + "4214", + "4215", + "4216", + "4219", + "4220", + "4221", + "4222", + "4224", + "4225", + "4226", + "4227", + "4228", + "1101", + "1103", + "1106", + "1108", + "1111", + "1112", + "1114", + "1120", + "1121", + "1122", + "1124", + "1127", + "1130", + "1133", + "1134", + "1144", + "1145", + "1146", + "1149", + "1151", + "1160", + "4601", + "1505", + "1506", + "1508", + "1511", + "1514", + "1516", + "1517", + "1520", + "1525", + "1528", + "1531", + "1532", + "1535", + "1539", + "1547", + "1560", + "1563", + "1566", + "1573", + "1577", + "1578", + "1579", + "030107", + "3112", + "3205", + "3330", + "3405", + "3419", + "3437", + "3447", + "3901", + "4212", + "110303", + "1119", + "1135", + "460108", + "4637", + "1515", + "1557", + "500101", + "500102", + "500103", + "500104", + "5057", + "1812", + "1826", + "1838", + "5607", + "5612", + "5636", + "3110", + "3226", + "3314", + "3429", + "4012", + "4030", + "4036", + "4223", + "4627", + "1554", + "1576", + "5007", + "5032", + "5043", + "1860", + "5503", + "5538", + "5610", + "3114", + "3116", + "3118", + "3120", + "3122", + "3124", + "3201", + "3203", + "3207", + "3209", + "3212", + "3214", + "3216", + "3218", + "3220", + "3222", + "3224", + "3228", + "3230", + "3232", + "3234", + "3236", + "3238", + "3240", + "3242", + "3301", + "3303", + "3305", + "3310", + "3312", + "3316", + "3318", + "3320", + "3322", + "3324", + "3326", + "3328", + "3332", + "3334", + "3415", + "3416", + "3417", + "3418", + "3420", + "3421", + "3422", + "3423", + "3424", + "3425", + "3426", + "3427", + "3428", + "3431", + "3432", + "3433", + "3435", + "3438", + "3439", + "3440", + "3441", + "3442", + "3443", + "3446", + "3448", + "3449", + "3450", + "3451", + "3452", + "3453", + "3454", + "3903", + "3905", + "3907", + "3909", + "4001", + "4003", + "4005", + "4010", + "4014", + "4016", + "4018", + "4020", + "4022", + "4024", + "4026", + "4028", + "4032", + "4034", + "4201", + "4202", + "4203", + "4205", + "4206", + "4207", + "4211", + "4213", + "4602", + "4612", + "4613", + "4614", + "4615", + "4616", + "4618", + "4619", + "4620", + "4621", + "4622", + "4623", + "4624", + "4625", + "4626", + "4628", + "4630", + "4631", + "4632", + "4633", + "4634", + "4635", + "4636", + "4638", + "4639", + "4640", + "4641", + "4642", + "4643", + "4644", + "4645", + "4646", + "4648", + "4649", + "4651", + "1580", + "5001", + "5006", + "5014", + "5020", + "5021", + "5022", + "5025" + ] + }, + { + "code": "AAR", + "filter": "item", + "values": [ + "2002_2002", + "2003_2003", + "2004_2004", + "2005_2005", + "2006_2006", + "2007_2007", + "2008_2008", + "2009_2009", + "2010_2010", + "2011_2011", + "2012_2012", + "2013_2013", + "2014_2014", + "2015_2015", + "2016_2016", + "2017_2017", + "2018_2018", + "2019_2019", + "2020_2020", + "2021_2021", + "2022_2022", + "2023_2023", + "2024_2024" + ] + }, + { + "code": "KJONN", + "filter": "item", + "values": [ + "0" + ] + }, + { + "code": "ALDER", + "filter": "item", + "values": [ + "0_120" + ] + }, + { + "code": "MEASURE_TYPE", + "filter": "item", + "values": [ + "TELLER", + "RATE" + ] + } + ], + "response": { + "format": "json-stat2", + "maxRowCount": 50000 + } +} diff --git a/tests/fixtures/sources.json b/tests/fixtures/sources.json new file mode 100644 index 0000000..d75791a --- /dev/null +++ b/tests/fixtures/sources.json @@ -0,0 +1,93 @@ +[ + { + "id": "nokkel", + "title": "Folkehelsestatistikk", + "description": "Statistikk om befolkning, oppvekst og levekår, miljø, skader, helserelatert atferd og helsetilstand.", + "aboutUrl": "https://www.helsedirektoratet.no/forebygging-diagnose-og-behandling/forebygging-og-levevaner/folkehelsestatistikk-og-profiler", + "publishedBy": "Helsedirektoratet" + }, + { + "id": "ngs", + "title": "Mikrobiologisk genomovervåkning", + "description": "Data fra helgenomsekvesering og andre genetiske analyser av bakterier og virus utført ved FHIs referanselaboratorier.", + "aboutUrl": "https://www.fhi.no/hd/laboratorie-analyser", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "mfr", + "title": "Medisinsk fødselsregister", + "description": "Om svangerskap, fødsler og nyfødte i Norge fra 1967 til i dag.", + "aboutUrl": "https://www.fhi.no/op/mfr", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "abr", + "title": "Abortregisteret", + "description": "Om svangerskapsbrudd i Norge fra 1979 til i dag.", + "aboutUrl": "https://www.fhi.no/op/abortregisteret", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "sysvak", + "title": "Nasjonalt vaksinasjonsregister SYSVAK", + "description": "Data for influensa- og koronavaksinasjoner og for Barnevaksinasjonsprogrammet (dekningsstatistikk).", + "aboutUrl": "https://www.fhi.no/va/sysvak/", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "daar", + "title": "Dødsårsakregisteret", + "description": "Om dødsfall og dødsårsaker i Norge fra 1951 til i dag.", + "aboutUrl": "https://www.fhi.no/op/dodsarsaksregisteret/", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "msis", + "title": "Meldingssystem for smittsomme sykdommer (MSIS) ", + "description": "Oversikt over meldingspliktige sykdommer fra 1977 til i dag.", + "aboutUrl": "https://allvis.fhi.no/msis", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "lmr", + "title": "Legemiddelregisteret", + "description": "Her finner du oversikt over legemidler som er utlevert på resept i Norge (fra og med 2004).", + "aboutUrl": "https://www.fhi.no/he/legemiddelregisteret/", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "gs", + "title": "Grossiststatistikk", + "description": "Salg av reseptbelagte og reseptfrie legemidler fra grossister til blant annet apotek, institusjoner og dagligvare.", + "aboutUrl": "https://www.fhi.no/he/legemiddelbruk/om-den-grossistbaserte-legemiddelfo/#statistikkbanker-med-data-om-legemiddelforbruk", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "npr", + "title": "Norsk pasientregister", + "description": "Statistikk fra somatiske fagområder, psykisk helsevern og tverrfaglig spesialisert rusbehandling.", + "aboutUrl": "https://www.fhi.no/he/npr", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "kpr", + "title": "Kommunalt pasient- og brukerregister", + "description": "Statistikk om bruk av helse- og omsorgstjenester i kommunene.", + "aboutUrl": "https://www.fhi.no/he/kpr/statistikk-og-rapporter", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "hkr", + "title": "Hjerte- og karsykdommer", + "description": "Opplysninger fra 2012 til i dag om personer med sykdommer i hjertet og blodårene, og om behandlingen av disse sykdommene.", + "aboutUrl": "https://www.fhi.no/is/hjertekar2/", + "publishedBy": "Folkehelseinstituttet" + }, + { + "id": "skast", + "title": "Skadedyrstatistikk", + "description": "Skadedyrstatistikken gir oversikt over hvor ofte utvalgte skadedyr bekjempes i Norge. Statistikken bygger på månedlige rapporter fra skadedyrfirmaer og viser utvikling mellom år og sesongvariasjoner.", + "aboutUrl": "https://www.fhi.no/sk/skadedyrbekjempelse/statistikk-om-skadedyr", + "publishedBy": "Folkehelseinstituttet" + } +] diff --git a/tests/fixtures/table_185.json b/tests/fixtures/table_185.json new file mode 100644 index 0000000..9b93986 --- /dev/null +++ b/tests/fixtures/table_185.json @@ -0,0 +1,6 @@ +{ + "tableId": 185, + "title": "Befolkningsvekst", + "publishedAt": "2025-10-21T08:56:39.806397Z", + "modifiedAt": "2025-10-21T08:56:39.806397Z" +} diff --git a/tests/fixtures/tables_nokkel.json b/tests/fixtures/tables_nokkel.json new file mode 100644 index 0000000..4ae7443 --- /dev/null +++ b/tests/fixtures/tables_nokkel.json @@ -0,0 +1,728 @@ +[ + { + "tableId": 334, + "title": "Antibiotikaresepter", + "publishedAt": "2024-11-04T19:48:29.225776Z", + "modifiedAt": "2024-11-04T19:48:29.225776Z" + }, + { + "tableId": 364, + "title": "Hjerte- og karregisteret_3aarigLFKB", + "publishedAt": "2024-10-29T12:57:13.733322Z", + "modifiedAt": "2024-10-29T12:57:13.733322Z" + }, + { + "tableId": 365, + "title": "Hjerte- og karregisteret_1aarigLHF", + "publishedAt": "2024-12-18T12:13:02.962319Z", + "modifiedAt": "2025-01-09T06:58:42.47032Z" + }, + { + "tableId": 385, + "title": "Legemidler til behandling av type-2 diabetes_3aarigLFK", + "publishedAt": "2024-10-29T13:09:17.201712Z", + "modifiedAt": "2024-10-29T13:09:17.201712Z" + }, + { + "tableId": 403, + "title": "Vedvarende_lavinntekt_kommunegrense", + "publishedAt": "2025-04-24T12:16:59.538284Z", + "modifiedAt": "2025-04-24T12:16:59.538285Z" + }, + { + "tableId": 601, + "title": "Mediebruk_DataTVspill_Ungdata_KH", + "publishedAt": "2025-05-21T11:28:47.455265Z", + "modifiedAt": "2025-05-21T11:43:20.96496Z" + }, + { + "tableId": 602, + "title": "Mediebruk_SOME_Ungdata_KH", + "publishedAt": "2025-05-21T11:45:16.001373Z", + "modifiedAt": "2025-05-21T11:45:16.001374Z" + }, + { + "tableId": 171, + "title": "Befolkningsframskriving", + "publishedAt": "2025-02-25T14:42:59.238949Z", + "modifiedAt": "2025-07-27T07:07:07.001823Z" + }, + { + "tableId": 358, + "title": "Gjeld", + "publishedAt": "2025-02-26T11:51:11.007021Z", + "modifiedAt": "2025-02-26T11:51:11.007021Z" + }, + { + "tableId": 362, + "title": "Grunnskolepoeng_UTDANN", + "publishedAt": "2026-01-08T15:38:55.208332Z", + "modifiedAt": "2026-01-08T15:38:55.208332Z" + }, + { + "tableId": 377, + "title": "Mobbing, 7. og 10. klasse, 3-årige tall", + "publishedAt": "2025-03-07T13:27:29.115965Z", + "modifiedAt": "2025-03-07T13:27:29.115965Z" + }, + { + "tableId": 336, + "title": "Barnehagekvalitet_bemanning", + "publishedAt": "2025-04-01T10:00:58.015678Z", + "modifiedAt": "2025-04-01T10:00:58.015678Z" + }, + { + "tableId": 355, + "title": "Fremtidsoptimisme_Ungdata_KH", + "publishedAt": "2025-04-23T08:48:45.229583Z", + "modifiedAt": "2025-04-23T08:48:45.229583Z" + }, + { + "tableId": 392, + "title": "Stønad_livsopphold", + "publishedAt": "2025-05-09T12:50:19.326568Z", + "modifiedAt": "2025-05-13T14:39:57.70227Z" + }, + { + "tableId": 664, + "title": "Sosialhjelpsmottakere", + "publishedAt": "2025-06-06T06:36:12.451102Z", + "modifiedAt": "2025-06-06T06:36:12.451103Z" + }, + { + "tableId": 667, + "title": "Mediebruk_underhold_ungdata", + "publishedAt": "2025-05-22T10:24:12.167179Z", + "modifiedAt": "2026-02-11T10:21:53.318047Z" + }, + { + "tableId": 685, + "title": "Regneferd_UTDANN_3", + "publishedAt": "2026-02-12T14:31:30.453553Z", + "modifiedAt": "2026-02-12T14:31:30.453553Z" + }, + { + "tableId": 688, + "title": "Forventede funksjonsfriske leveår_7", + "publishedAt": "2025-06-23T09:07:30.416427Z", + "modifiedAt": "2025-06-23T09:14:39.112096Z" + }, + { + "tableId": 606, + "title": "Tannhelse_DMFT=0_MED_DEKNING", + "publishedAt": "2025-09-04T14:44:50.755457Z", + "modifiedAt": "2025-09-04T14:44:50.755457Z" + }, + { + "tableId": 401, + "title": "Valgdeltakelse", + "publishedAt": "2025-09-26T06:51:55.50102Z", + "modifiedAt": "2025-09-26T06:51:55.50102Z" + }, + { + "tableId": 338, + "title": "Befolkningssammensetning_antall_andel", + "publishedAt": "2025-10-21T08:50:38.184798Z", + "modifiedAt": "2025-10-21T08:50:38.184798Z" + }, + { + "tableId": 185, + "title": "Befolkningsvekst", + "publishedAt": "2025-10-21T08:56:39.806397Z", + "modifiedAt": "2025-10-21T08:56:39.806397Z" + }, + { + "tableId": 367, + "title": "Overvekt, kvinner, MFR", + "publishedAt": "2025-10-21T09:02:08.952188Z", + "modifiedAt": "2025-10-21T09:02:08.952188Z" + }, + { + "tableId": 699, + "title": "NPR_1", + "publishedAt": "2025-10-21T09:15:04.540704Z", + "modifiedAt": "2025-10-21T09:15:04.540704Z" + }, + { + "tableId": 714, + "title": "NPR_3", + "publishedAt": "2025-10-21T10:06:51.087779Z", + "modifiedAt": "2026-01-26T16:14:30.406547Z" + }, + { + "tableId": 752, + "title": "Sosialhjelpsmottakere, ettårig", + "publishedAt": "2025-12-02T13:27:15.205756Z", + "modifiedAt": "2025-12-02T13:27:15.205756Z" + }, + { + "tableId": 369, + "title": "KPR_3", + "publishedAt": "2025-11-05T12:47:10.174154Z", + "modifiedAt": "2026-02-17T07:03:21.906223Z" + }, + { + "tableId": 370, + "title": "KPR_1", + "publishedAt": "2025-11-05T12:46:34.40957Z", + "modifiedAt": "2026-02-17T07:19:49.707508Z" + }, + { + "tableId": 187, + "title": "Personer som bor alene", + "publishedAt": "2025-11-18T09:20:23.692617Z", + "modifiedAt": "2025-12-08T09:10:42.87317Z" + }, + { + "tableId": 181, + "title": "Utdanningsnivå", + "publishedAt": "2025-11-18T12:35:21.349944Z", + "modifiedAt": "2025-11-18T12:35:21.349944Z" + }, + { + "tableId": 511, + "title": "Utdanningsforskjeller i forventet levealder_7aarigLF", + "publishedAt": "2025-12-02T20:40:11.468561Z", + "modifiedAt": "2025-12-03T09:43:16.729127Z" + }, + { + "tableId": 510, + "title": "Utdanningsforskjeller i forventet levealder_15aarigLFKB", + "publishedAt": "2025-12-02T20:42:56.504929Z", + "modifiedAt": "2025-12-03T09:55:11.706007Z" + }, + { + "tableId": 509, + "title": "Forventet levealder_årligetall_ettårALDER_UTD", + "publishedAt": "2025-12-02T20:56:33.43415Z", + "modifiedAt": "2025-12-03T09:35:51.995297Z" + }, + { + "tableId": 507, + "title": "Forventet_levealder_TOT_og_utdn_7aarigLF", + "publishedAt": "2025-12-02T21:06:52.621887Z", + "modifiedAt": "2025-12-03T09:38:09.598497Z" + }, + { + "tableId": 508, + "title": "Forventet levealder etter utdn_15aarigLFKB", + "publishedAt": "2025-12-02T21:13:26.471895Z", + "modifiedAt": "2025-12-03T09:39:34.354203Z" + }, + { + "tableId": 660, + "title": "Forventet levealder totalt og etter utdanning, 25-årig", + "publishedAt": "2025-12-02T21:17:47.139173Z", + "modifiedAt": "2025-12-03T09:41:53.907439Z" + }, + { + "tableId": 186, + "title": "Eierstatus", + "publishedAt": "2025-12-08T09:24:12.660856Z", + "modifiedAt": "2025-12-08T09:24:12.660858Z" + }, + { + "tableId": 359, + "title": "Gjennomforing i videregående skole_innvand_3", + "publishedAt": "2025-12-15T10:25:56.152534Z", + "modifiedAt": "2025-12-15T10:25:56.152534Z" + }, + { + "tableId": 677, + "title": "Gjennomforing i videregående skole_innvand_1", + "publishedAt": "2025-12-15T10:20:14.864234Z", + "modifiedAt": "2025-12-15T10:20:14.864236Z" + }, + { + "tableId": 342, + "title": "Dødsårsaker tiårig", + "publishedAt": "2026-01-12T09:40:25.065037Z", + "modifiedAt": "2026-01-12T09:40:25.065038Z" + }, + { + "tableId": 344, + "title": "Selvmord femårig", + "publishedAt": "2026-01-12T09:39:58.096599Z", + "modifiedAt": "2026-01-12T09:39:58.096601Z" + }, + { + "tableId": 345, + "title": "Trafikkulykker, femårige tall", + "publishedAt": "2026-01-12T09:39:32.447777Z", + "modifiedAt": "2026-01-12T09:39:32.447779Z" + }, + { + "tableId": 343, + "title": "Dødsårsaker-nøkkeltall-1990-ettårig", + "publishedAt": "2026-01-12T09:40:55.35502Z", + "modifiedAt": "2026-01-12T09:40:55.35502Z" + }, + { + "tableId": 623, + "title": " Gjennomforing_VGO_utdann_1", + "publishedAt": "2026-01-09T11:02:05.09823Z", + "modifiedAt": "2026-01-09T11:02:05.098231Z" + }, + { + "tableId": 360, + "title": "Gjennomforing i videregående skole_utdann_3", + "publishedAt": "2026-01-09T11:02:53.166341Z", + "modifiedAt": "2026-01-09T11:02:53.166342Z" + }, + { + "tableId": 361, + "title": "Grunnskolepoeng_INNVKAT", + "publishedAt": "2026-01-08T15:38:28.022599Z", + "modifiedAt": "2026-01-08T15:38:28.022599Z" + }, + { + "tableId": 341, + "title": "Drikkevannsforsyning", + "publishedAt": "2026-01-12T08:32:55.070565Z", + "modifiedAt": "2026-01-12T08:32:55.070566Z" + }, + { + "tableId": 619, + "title": "Kreft, nye tilfeller_ettårig_LFKB", + "publishedAt": "2026-01-18T13:01:46.594781Z", + "modifiedAt": "2026-01-18T13:01:46.594781Z" + }, + { + "tableId": 368, + "title": "Kreft, nye tilfeller_10aarigLFKB", + "publishedAt": "2026-01-18T13:01:08.097792Z", + "modifiedAt": "2026-01-18T13:01:08.097792Z" + }, + { + "tableId": 396, + "title": "Vaks_dekning_Influensa", + "publishedAt": "2026-01-13T10:35:14.248503Z", + "modifiedAt": "2026-01-13T10:51:57.052147Z" + }, + { + "tableId": 394, + "title": "Vaksinasjonsdekning_1", + "publishedAt": "2026-01-14T14:23:21.353955Z", + "modifiedAt": "2026-01-14T14:23:21.353955Z" + }, + { + "tableId": 395, + "title": "Vaksinasjonsdekning_5aar", + "publishedAt": "2026-01-14T14:28:11.323274Z", + "modifiedAt": "2026-01-14T14:28:11.323274Z" + }, + { + "tableId": 795, + "title": "Trangbodd_INNVAND", + "publishedAt": "2026-01-20T09:35:28.257245Z", + "modifiedAt": "2026-01-20T09:35:28.257246Z" + }, + { + "tableId": 388, + "title": "Overvekt_verneplikt_4", + "publishedAt": "2026-01-16T14:53:42.245577Z", + "modifiedAt": "2026-01-16T14:53:42.245577Z" + }, + { + "tableId": 387, + "title": "Overvekt_verneplikt_1", + "publishedAt": "2026-01-16T14:54:33.539299Z", + "modifiedAt": "2026-01-19T13:19:13.71667Z" + }, + { + "tableId": 794, + "title": "Trangbodd_UTDANN", + "publishedAt": "2026-01-19T11:04:07.466643Z", + "modifiedAt": "2026-01-19T11:04:07.466643Z" + }, + { + "tableId": 353, + "title": "Trening_forsvaret_sesjon1_1", + "publishedAt": "2026-01-20T09:19:15.088638Z", + "modifiedAt": "2026-01-20T09:19:15.088638Z" + }, + { + "tableId": 352, + "title": "Trening_forsvaret_sesjon1_3", + "publishedAt": "2026-01-20T09:18:53.707281Z", + "modifiedAt": "2026-01-20T09:18:53.707282Z" + }, + { + "tableId": 332, + "title": "Alkohol_Ungdata_KH", + "publishedAt": "2026-01-20T21:37:20.273043Z", + "modifiedAt": "2026-01-21T13:13:23.231866Z" + }, + { + "tableId": 339, + "title": "Depressive symptomer_Ungdata_KH", + "publishedAt": "2026-01-20T21:41:49.907708Z", + "modifiedAt": "2026-01-21T13:16:36.980776Z" + }, + { + "tableId": 348, + "title": "Ensomhet_Ungdata_KH", + "publishedAt": "2026-01-20T21:45:04.111339Z", + "modifiedAt": "2026-01-21T13:17:10.836688Z" + }, + { + "tableId": 349, + "title": "Fornoyd_helsa_Ungdata_KH", + "publishedAt": "2026-01-20T21:47:33.009601Z", + "modifiedAt": "2026-01-21T13:17:36.956513Z" + }, + { + "tableId": 354, + "title": "FORTROLIGVENN_Ungdata_KH", + "publishedAt": "2026-01-20T21:50:04.241821Z", + "modifiedAt": "2026-01-21T13:18:06.442229Z" + }, + { + "tableId": 356, + "title": "Fritidsorg_deltak_ungdata", + "publishedAt": "2026-01-21T13:20:34.839291Z", + "modifiedAt": "2026-01-21T14:32:48.194358Z" + }, + { + "tableId": 357, + "title": "Fysisk_inakt_Ungdata_KH", + "publishedAt": "2026-01-21T14:31:57.063384Z", + "modifiedAt": "2026-01-21T14:31:57.063385Z" + }, + { + "tableId": 363, + "title": "Hasjbruk_Ungdata_KH", + "publishedAt": "2026-01-21T14:44:06.197311Z", + "modifiedAt": "2026-02-10T14:25:43.456118Z" + }, + { + "tableId": 335, + "title": "Barn av sosialhjelpsmottakere", + "publishedAt": "2026-02-10T09:44:35.054662Z", + "modifiedAt": "2026-02-10T09:44:35.054664Z" + }, + { + "tableId": 800, + "title": "Luftforurensning, grenseverdi", + "publishedAt": "2026-02-05T07:45:33.161833Z", + "modifiedAt": "2026-02-05T07:45:33.161834Z" + }, + { + "tableId": 787, + "title": "Uføre_UTDANN_1", + "publishedAt": "2026-01-27T14:35:55.553669Z", + "modifiedAt": "2026-02-17T13:02:41.116533Z" + }, + { + "tableId": 788, + "title": "Uføre_UTDANN_3", + "publishedAt": "2026-01-27T14:40:35.018853Z", + "modifiedAt": "2026-02-17T13:02:26.012611Z" + }, + { + "tableId": 790, + "title": "AAP_UTDANN_1", + "publishedAt": "2026-01-27T14:44:59.329574Z", + "modifiedAt": "2026-01-27T14:44:59.329574Z" + }, + { + "tableId": 791, + "title": "AAP_UTDANN_3", + "publishedAt": "2026-01-27T14:47:49.169494Z", + "modifiedAt": "2026-01-30T15:54:29.736474Z" + }, + { + "tableId": 803, + "title": "Luftforurensning, PWC (ny)", + "publishedAt": "2026-02-05T07:45:49.979013Z", + "modifiedAt": "2026-02-05T07:45:49.979015Z" + }, + { + "tableId": 397, + "title": "TRIVSEL_1", + "publishedAt": "2026-02-03T14:50:45.287376Z", + "modifiedAt": "2026-02-04T11:35:45.312498Z" + }, + { + "tableId": 378, + "title": "Mobbing, 7. og 10. klasse_1", + "publishedAt": "2026-02-03T15:08:19.411738Z", + "modifiedAt": "2026-02-03T15:08:19.411738Z" + }, + { + "tableId": 805, + "title": "TRIVSEL_3", + "publishedAt": "2026-02-04T11:35:10.518412Z", + "modifiedAt": "2026-02-10T14:13:24.307206Z" + }, + { + "tableId": 806, + "title": "MOBBING_3", + "publishedAt": "2026-02-04T11:50:48.897641Z", + "modifiedAt": "2026-02-04T11:50:48.897642Z" + }, + { + "tableId": 373, + "title": "Livskvalitet_Ungdata_KH", + "publishedAt": "2026-02-04T12:25:46.313579Z", + "modifiedAt": "2026-02-04T12:25:46.313579Z" + }, + { + "tableId": 366, + "title": "Inntektsulikhet", + "publishedAt": "2026-02-06T09:20:40.904633Z", + "modifiedAt": "2026-02-06T09:20:40.904633Z" + }, + { + "tableId": 804, + "title": "Sysselsatte_UTDANN_ettårig", + "publishedAt": "2026-02-06T10:22:45.587531Z", + "modifiedAt": "2026-02-06T10:22:45.587531Z" + }, + { + "tableId": 807, + "title": "Sysselsatte_INNVKAT_ettårig", + "publishedAt": "2026-02-06T10:22:38.256208Z", + "modifiedAt": "2026-02-06T10:22:38.256208Z" + }, + { + "tableId": 175, + "title": "Innvandrere og norskfødte med innv.foreldre _LANDBAK", + "publishedAt": "2026-02-06T14:57:46.6586Z", + "modifiedAt": "2026-02-06T14:57:46.6586Z" + }, + { + "tableId": 374, + "title": "Nærmiljø_Ungdata_KH", + "publishedAt": "2026-02-09T11:18:59.078553Z", + "modifiedAt": "2026-02-09T11:18:59.078555Z" + }, + { + "tableId": 375, + "title": "Treffsteder for unge_Ungdata_KH", + "publishedAt": "2026-02-09T11:26:27.884848Z", + "modifiedAt": "2026-02-09T11:26:27.88485Z" + }, + { + "tableId": 380, + "title": "Regelbrudd_Ungdata_KH", + "publishedAt": "2026-02-09T12:34:43.499082Z", + "modifiedAt": "2026-02-09T12:34:43.499083Z" + }, + { + "tableId": 389, + "title": "Skjermtid_Ungdata_KH", + "publishedAt": "2026-02-09T12:38:50.920167Z", + "modifiedAt": "2026-02-09T12:38:50.920169Z" + }, + { + "tableId": 390, + "title": "Smertestillende_ungdata", + "publishedAt": "2026-02-09T12:42:44.0123Z", + "modifiedAt": "2026-02-09T12:42:44.012302Z" + }, + { + "tableId": 399, + "title": "Trygghet_ungdata", + "publishedAt": "2026-02-09T12:47:30.435923Z", + "modifiedAt": "2026-02-09T12:47:30.435925Z" + }, + { + "tableId": 650, + "title": "INNVAND_INNVKAT", + "publishedAt": "2026-02-10T09:09:07.520257Z", + "modifiedAt": "2026-02-10T09:09:07.520259Z" + }, + { + "tableId": 371, + "title": "Leseferdighet_UTDANN_1", + "publishedAt": "2026-02-10T10:35:48.263727Z", + "modifiedAt": "2026-02-10T11:17:59.825692Z" + }, + { + "tableId": 670, + "title": "Leseferdighet_utdann_3", + "publishedAt": "2026-02-10T10:42:04.095025Z", + "modifiedAt": "2026-02-10T10:42:04.095025Z" + }, + { + "tableId": 689, + "title": "Regneferd_INNVKAT_1", + "publishedAt": "2026-02-10T11:17:08.620557Z", + "modifiedAt": "2026-02-10T11:17:08.620557Z" + }, + { + "tableId": 641, + "title": "Regneferd_INNVKAT_3", + "publishedAt": "2026-02-10T11:25:22.449729Z", + "modifiedAt": "2026-02-10T11:25:22.449729Z" + }, + { + "tableId": 809, + "title": "NEET_UTDANN", + "publishedAt": "2026-02-10T19:20:19.064Z", + "modifiedAt": "2026-02-17T13:02:00.715683Z" + }, + { + "tableId": 810, + "title": "Andrenarko_Ungdata", + "publishedAt": "2026-03-09T15:21:38.643178Z", + "modifiedAt": "2026-03-10T13:02:04.859057Z" + }, + { + "tableId": 811, + "title": "Hasjtilbud_Ungdata", + "publishedAt": "2026-03-09T15:21:24.178186Z", + "modifiedAt": "2026-03-09T15:21:24.178187Z" + }, + { + "tableId": 600, + "title": "Kollektivtilbud, ungdom", + "publishedAt": "2026-02-10T16:22:04.374466Z", + "modifiedAt": "2026-02-11T09:51:32.652556Z" + }, + { + "tableId": 812, + "title": "Røyk_Ungdata", + "publishedAt": "2026-03-09T15:21:08.614525Z", + "modifiedAt": "2026-03-09T15:21:08.614526Z" + }, + { + "tableId": 813, + "title": "Skulketskolen_Ungdata", + "publishedAt": "2026-03-09T13:40:23.13494Z", + "modifiedAt": "2026-03-09T13:40:23.134986Z" + }, + { + "tableId": 814, + "title": "Snus_Ungdata", + "publishedAt": "2026-03-09T15:20:44.524352Z", + "modifiedAt": "2026-03-09T15:20:44.524353Z" + }, + { + "tableId": 815, + "title": "Vape_Ungdata", + "publishedAt": "2026-03-09T15:20:20.568002Z", + "modifiedAt": "2026-03-09T15:20:20.568003Z" + }, + { + "tableId": 607, + "title": "Venner_inne_Ungdata", + "publishedAt": "2026-02-10T20:23:12.81944Z", + "modifiedAt": "2026-02-11T09:52:40.767342Z" + }, + { + "tableId": 379, + "title": "NEET_INNVKAT", + "publishedAt": "2026-02-10T19:44:15.283028Z", + "modifiedAt": "2026-02-10T19:44:15.283028Z" + }, + { + "tableId": 608, + "title": "Venner_ute_Ungdata", + "publishedAt": "2026-02-10T20:26:35.178965Z", + "modifiedAt": "2026-02-11T09:56:08.051697Z" + }, + { + "tableId": 672, + "title": "Regneferd_UTDANN_1", + "publishedAt": "2026-02-12T14:28:00.419856Z", + "modifiedAt": "2026-02-12T14:28:00.419856Z" + }, + { + "tableId": 350, + "title": "Forsvaret_Svomming_3", + "publishedAt": "2026-02-12T13:26:54.37389Z", + "modifiedAt": "2026-02-12T13:26:54.37389Z" + }, + { + "tableId": 671, + "title": "Leseferdighet_innvkat_1", + "publishedAt": "2026-02-16T12:22:31.91486Z", + "modifiedAt": "2026-02-16T12:22:31.91486Z" + }, + { + "tableId": 351, + "title": "Forsvaret_Svomming_1", + "publishedAt": "2026-02-16T13:58:22.362566Z", + "modifiedAt": "2026-02-16T13:58:22.362566Z" + }, + { + "tableId": 372, + "title": "Leseferdighet_INNVKAT_3", + "publishedAt": "2026-02-17T12:15:42.934827Z", + "modifiedAt": "2026-02-17T12:15:42.934827Z" + }, + { + "tableId": 826, + "title": "SYKEFRAV_SSB", + "publishedAt": "2026-02-23T07:05:48.431171Z", + "modifiedAt": "2026-02-23T07:49:54.660396Z" + }, + { + "tableId": 583, + "title": "Vedvarende lavinntekt INNVKAT", + "publishedAt": "2026-02-23T08:44:50.032799Z", + "modifiedAt": "2026-02-23T08:44:50.032808Z" + }, + { + "tableId": 839, + "title": "ROYK_STATBANK", + "publishedAt": "2026-03-04T12:48:25.930929Z", + "modifiedAt": "2026-03-04T12:48:25.93093Z" + }, + { + "tableId": 346, + "title": "Barn av enslige forsørgere_3aarigLFKB", + "publishedAt": "2026-03-09T08:48:17.960139Z", + "modifiedAt": "2026-03-09T08:48:17.960139Z" + }, + { + "tableId": 391, + "title": "Søvnproblemer_Ungdata_KH", + "publishedAt": "2026-03-11T11:11:19.625366Z", + "modifiedAt": "2026-03-11T11:11:19.625366Z" + }, + { + "tableId": 852, + "title": "Andrenarko_Ungdata_VGO", + "publishedAt": "2026-03-23T10:36:27.084294Z", + "modifiedAt": "2026-03-23T10:36:27.084296Z" + }, + { + "tableId": 859, + "title": "Hasjbruk_Ungdata_VGO", + "publishedAt": "2026-03-23T10:35:53.428246Z", + "modifiedAt": "2026-03-23T10:35:53.428247Z" + }, + { + "tableId": 860, + "title": "Røyk_Ungdata_VGO", + "publishedAt": "2026-03-23T10:39:47.298722Z", + "modifiedAt": "2026-03-23T10:39:47.298723Z" + }, + { + "tableId": 862, + "title": "Snus_Ungdata_VGO", + "publishedAt": "2026-03-23T10:34:30.503178Z", + "modifiedAt": "2026-03-23T10:34:30.503179Z" + }, + { + "tableId": 863, + "title": "Vape_Ungdata_VGO", + "publishedAt": "2026-03-23T10:37:45.600809Z", + "modifiedAt": "2026-03-23T10:37:45.600809Z" + }, + { + "tableId": 337, + "title": "Barnevern_tiltak", + "publishedAt": "2026-03-24T09:13:05.651799Z", + "modifiedAt": "2026-03-24T09:13:05.651799Z" + }, + { + "tableId": 393, + "title": "Røyking_MFR", + "publishedAt": "2026-03-24T09:23:03.575448Z", + "modifiedAt": "2026-03-24T09:23:03.575448Z" + } +] diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..f2673b7 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,48 @@ +"""Tests for cache module.""" + +import time + +from fhi_statistikk_mcp.cache import TTLCache + + +def test_set_and_get(): + cache = TTLCache() + cache.set("key", "value", 60) + assert cache.get("key") == "value" + + +def test_get_missing_key(): + cache = TTLCache() + assert cache.get("nonexistent") is None + + +def test_expiry(): + cache = TTLCache() + cache.set("key", "value", 0.1) + time.sleep(0.15) + assert cache.get("key") is None + + +def test_clear(): + cache = TTLCache() + cache.set("a", 1, 60) + cache.set("b", 2, 60) + cache.clear() + assert cache.get("a") is None + assert cache.get("b") is None + + +def test_overwrite(): + cache = TTLCache() + cache.set("key", "old", 60) + cache.set("key", "new", 60) + assert cache.get("key") == "new" + + +def test_different_ttls(): + cache = TTLCache() + cache.set("short", "value", 0.1) + cache.set("long", "value", 60) + time.sleep(0.15) + assert cache.get("short") is None + assert cache.get("long") == "value" diff --git a/tests/test_transformers.py b/tests/test_transformers.py new file mode 100644 index 0000000..b4850b7 --- /dev/null +++ b/tests/test_transformers.py @@ -0,0 +1,366 @@ +"""Tests for transformers module.""" + +import pytest + +from fhi_statistikk_mcp.transformers import ( + complete_query_dimensions, + extract_metadata_fields, + flatten_categories, + is_year_dimension, + matches_search, + navigate_hierarchy, + normalize_for_search, + normalize_year_value, + parse_csv_to_rows, + strip_html, + summarize_dimensions, +) + + +# --- strip_html --- + +def test_strip_html_removes_tags(): + assert strip_html("

Hello world

") == "Hello world" + + +def test_strip_html_preserves_plain_text(): + assert strip_html("No tags here") == "No tags here" + + +def test_strip_html_handles_empty(): + assert strip_html("") == "" + assert strip_html(None) is None + + +def test_strip_html_handles_links(): + assert strip_html('link') == "link" + + +def test_strip_html_decodes_entities(): + assert strip_html("& <b>  ") == "& " + + +# --- normalize_for_search / matches_search --- + +def test_normalize_strips_accents(): + assert normalize_for_search("Tromsø") == "tromso" + assert normalize_for_search("Bærum") == "barum" + assert normalize_for_search("Ålesund") == "alesund" + + +def test_normalize_lowercases(): + assert normalize_for_search("OSLO") == "oslo" + + +def test_matches_search_single_word(): + assert matches_search("Befolkningsvekst", "befolkning") + assert not matches_search("Befolkningsvekst", "helse") + + +def test_matches_search_multiple_words(): + assert matches_search("Befolkningsvekst Oslo", "befolkning oslo") + assert not matches_search("Befolkningsvekst", "befolkning oslo") + + +def test_matches_search_accent_insensitive(): + assert matches_search("Tromsø kommune", "tromso") + assert matches_search("Bærum", "barum") + + +# --- normalize_year_value --- + +def test_normalize_year_short(): + assert normalize_year_value("2020") == "2020_2020" + + +def test_normalize_year_already_full(): + assert normalize_year_value("2020_2020") == "2020_2020" + + +def test_normalize_year_non_numeric(): + assert normalize_year_value("all") == "all" + + +# --- flatten_categories --- + +NESTED_TREE = [ + { + "value": "0", + "label": "Hele landet", + "children": [ + { + "value": "03", + "label": "Oslo (fylke)", + "children": [ + {"value": "0301", "label": "Oslo", "children": []}, + ], + }, + { + "value": "18", + "label": "Nordland", + "children": [ + {"value": "1804", "label": "Bodø", "children": []}, + {"value": "1806", "label": "Narvik", "children": []}, + ], + }, + ], + }, +] + + +def test_flatten_categories_count(): + flat = flatten_categories(NESTED_TREE) + assert len(flat) == 6 + + +def test_flatten_categories_parent_values(): + flat = flatten_categories(NESTED_TREE) + by_value = {c["value"]: c for c in flat} + assert by_value["0"]["parent_value"] is None + assert by_value["03"]["parent_value"] == "0" + assert by_value["0301"]["parent_value"] == "03" + assert by_value["1804"]["parent_value"] == "18" + + +def test_flatten_categories_empty(): + assert flatten_categories([]) == [] + + +# --- navigate_hierarchy --- + +def test_navigate_top_level(): + result = navigate_hierarchy(NESTED_TREE) + assert len(result) == 1 + assert result[0]["value"] == "0" + assert result[0]["child_count"] == 2 + + +def test_navigate_children(): + result = navigate_hierarchy(NESTED_TREE, parent_value="18") + assert len(result) == 2 + values = {r["value"] for r in result} + assert values == {"1804", "1806"} + + +def test_navigate_search(): + result = navigate_hierarchy(NESTED_TREE, search="bodø") + assert len(result) == 1 + assert result[0]["value"] == "1804" + + +def test_navigate_search_accent_insensitive(): + result = navigate_hierarchy(NESTED_TREE, search="bodo") + assert len(result) == 1 + assert result[0]["label"] == "Bodø" + + +# --- summarize_dimensions --- + +def test_summarize_fixed_dimension(): + dims = [{"code": "KJONN", "label": "Kjønn", "categories": [ + {"value": "0", "label": "kjønn samlet", "children": []} + ]}] + result = summarize_dimensions(dims) + assert len(result) == 1 + assert result[0]["is_fixed"] is True + assert result[0]["total_categories"] == 1 + + +def test_summarize_year_dimension(): + cats = [{"value": f"{y}_{y}", "label": str(y), "children": []} + for y in range(2020, 2025)] + dims = [{"code": "AAR", "label": "År", "categories": cats}] + result = summarize_dimensions(dims) + assert result[0]["value_format"] == "YYYY_YYYY (e.g. 2020_2020)" + assert result[0]["range"] == "2020..2024" + + +def test_summarize_hierarchical_large(): + children = [{"value": str(i), "label": f"Municipality {i}", "children": []} + for i in range(1, 30)] + cats = [{"value": "0", "label": "Hele landet", "children": children}] + dims = [{"code": "GEO", "label": "Geografi", "categories": cats}] + result = summarize_dimensions(dims) + assert result[0]["is_hierarchical"] is True + assert "top_level_values" in result[0] + assert result[0]["top_level_values"][0]["child_count"] == 29 + + +def test_summarize_small_dimension(): + cats = [ + {"value": "TELLER", "label": "antall", "children": []}, + {"value": "RATE", "label": "prosent", "children": []}, + ] + dims = [{"code": "MEASURE_TYPE", "label": "Måltall", "categories": cats}] + result = summarize_dimensions(dims) + assert len(result[0]["values"]) == 2 + assert result[0]["values"][0] == {"value": "TELLER", "label": "antall"} + + +# --- extract_metadata_fields --- + +def test_extract_metadata_dict(): + meta = { + "name": "Test", + "isOfficialStatistics": True, + "paragraphs": [ + {"header": "Beskrivelse", "content": "

Some description

"}, + {"header": "Oppdateringsfrekvens", "content": "Årlig"}, + {"header": "Nøkkelord", "content": "Helse,Data"}, + {"header": "Kildeinstitusjon", "content": "FHI"}, + ], + } + fields = extract_metadata_fields(meta) + assert fields["is_official_statistics"] is True + assert fields["description"] == "Some description" + assert fields["update_frequency"] == "Årlig" + assert fields["keywords"] == ["Helse", "Data"] + assert fields["source_institution"] == "FHI" + + +def test_extract_metadata_strips_html(): + meta = { + "paragraphs": [ + {"header": "Beskrivelse", "content": "

Text with link

"}, + ], + } + fields = extract_metadata_fields(meta) + assert fields["description"] == "Text with link" + + +# --- parse_csv_to_rows --- + +def test_parse_csv_basic(): + csv_text = '"Col A";"Col B"\n"Oslo";"123"\n"Bergen";"456"\n' + result = parse_csv_to_rows(csv_text) + assert result["total_rows"] == 2 + assert result["truncated"] is False + assert result["rows"][0]["Col A"] == "Oslo" + assert result["rows"][0]["Col B"] == 123 + + +def test_parse_csv_truncation(): + csv_text = '"X"\n"a"\n"b"\n"c"\n' + result = parse_csv_to_rows(csv_text, max_rows=2) + assert result["total_rows"] == 3 + assert result["truncated"] is True + assert len(result["rows"]) == 2 + + +def test_parse_csv_numeric_conversion(): + csv_text = '"int";"float";"missing";"text"\n"42";"3.14";"..";"hello"\n' + result = parse_csv_to_rows(csv_text) + row = result["rows"][0] + assert row["int"] == 42 + assert row["float"] == 3.14 + assert row["missing"] is None + assert row["text"] == "hello" + + +def test_parse_csv_comma_decimal(): + csv_text = '"val"\n"1,5"\n' + result = parse_csv_to_rows(csv_text) + assert result["rows"][0]["val"] == 1.5 + + +# --- is_year_dimension --- + +def test_is_year_by_code(): + assert is_year_dimension("AAR", []) is True + assert is_year_dimension("YEAR", []) is True + assert is_year_dimension("GEO", []) is False + + +def test_is_year_by_value_format(): + flat = [{"value": "2020_2020", "label": "2020", "parent_value": None}] + assert is_year_dimension("CUSTOM", flat) is True + + +# --- complete_query_dimensions --- + +SAMPLE_DIMS = [ + {"code": "GEO", "label": "Geografi", "categories": NESTED_TREE}, + {"code": "AAR", "label": "År", "categories": [ + {"value": "2023_2023", "label": "2023", "children": []}, + {"value": "2024_2024", "label": "2024", "children": []}, + ]}, + {"code": "KJONN", "label": "Kjønn", "categories": [ + {"value": "0", "label": "kjønn samlet", "children": []}, + ]}, + {"code": "ALDER", "label": "Alder", "categories": [ + {"value": "0_120", "label": "alle aldre", "children": []}, + ]}, + {"code": "MEASURE_TYPE", "label": "Måltall", "categories": [ + {"value": "TELLER", "label": "antall", "children": []}, + {"value": "RATE", "label": "prosent", "children": []}, + ]}, +] + + +def test_complete_dims_fixed_auto_included(): + user_dims = [ + {"code": "GEO", "filter": "item", "values": ["0301"]}, + {"code": "AAR", "filter": "bottom", "values": ["1"]}, + ] + result = complete_query_dimensions(SAMPLE_DIMS, user_dims) + codes = {d["code"] for d in result} + assert "KJONN" in codes + assert "ALDER" in codes + kjonn = next(d for d in result if d["code"] == "KJONN") + assert kjonn["values"] == ["0"] + + +def test_complete_dims_measure_type_defaults_to_all(): + user_dims = [ + {"code": "GEO", "filter": "item", "values": ["0"]}, + {"code": "AAR", "filter": "item", "values": ["2024"]}, + ] + result = complete_query_dimensions(SAMPLE_DIMS, user_dims) + mt = next(d for d in result if d["code"] == "MEASURE_TYPE") + assert mt["filter"] == "all" + assert mt["values"] == ["*"] + + +def test_complete_dims_year_normalization(): + user_dims = [ + {"code": "GEO", "filter": "item", "values": ["0"]}, + {"code": "AAR", "filter": "item", "values": ["2024"]}, + ] + result = complete_query_dimensions(SAMPLE_DIMS, user_dims) + aar = next(d for d in result if d["code"] == "AAR") + assert aar["values"] == ["2024_2024"] + + +def test_complete_dims_missing_required_raises(): + user_dims = [ + {"code": "AAR", "filter": "item", "values": ["2024"]}, + ] + with pytest.raises(ValueError, match="Missing required dimensions.*GEO"): + complete_query_dimensions(SAMPLE_DIMS, user_dims) + + +def test_complete_dims_missing_code_key_raises(): + user_dims = [{"filter": "item", "values": ["0"]}] + with pytest.raises(ValueError, match="missing 'code' key"): + complete_query_dimensions(SAMPLE_DIMS, user_dims) + + +def test_complete_dims_case_insensitive(): + user_dims = [ + {"code": "geo", "filter": "item", "values": ["0"]}, + {"code": "aar", "filter": "item", "values": ["2024"]}, + ] + result = complete_query_dimensions(SAMPLE_DIMS, user_dims) + codes = [d["code"] for d in result] + assert "GEO" in codes + assert "AAR" in codes + + +def test_complete_dims_no_year_normalization_for_top_filter(): + user_dims = [ + {"code": "GEO", "filter": "item", "values": ["0"]}, + {"code": "AAR", "filter": "top", "values": ["3"]}, + ] + result = complete_query_dimensions(SAMPLE_DIMS, user_dims) + aar = next(d for d in result if d["code"] == "AAR") + assert aar["values"] == ["3"] # not "3_3"