final implementation
Some checks failed
Build and Push Docker Image / build-and-push (push) Has been cancelled

This commit is contained in:
Torbjørn Lindahl
2026-03-27 23:38:52 +01:00
parent 4b2e7376bd
commit 3b3721091b
31 changed files with 5107 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
name: Build and Push Docker Image
on:
workflow_dispatch:
push:
branches:
- main
paths:
- 'src/**'
- 'Dockerfile'
- 'pyproject.toml'
- '.github/workflows/docker-build-push.yaml'
env:
ACR_NAME: crfhiskybert
IMAGE: crfhiskybert.azurecr.io/fida/ki/statistikk-mcp
jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get short SHA
id: sha
run: echo "short=$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
- name: Login to Azure using Federated Identity
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
allow-no-subscriptions: true
- name: Login to ACR
run: az acr login --name ${{ env.ACR_NAME }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.IMAGE }}
tags: |
type=sha,prefix=
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
target: prod
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Trigger GitOps tag update
run: |
curl -sS -f -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITOPS_PAT }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"https://api.github.com/repos/${{ vars.GITOPS_REPO }}/dispatches" \
-d '{"event_type":"update_tag","client_payload":{"env":"test","updates":[{"repository":"fida/ki/statistikk-mcp","tag":"${{ steps.sha.outputs.short }}"}]}}'

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
.venv
.mcp.json

8
.mcp.json.local Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"fhi-statistikk": {
"type": "sse",
"url": "http://localhost:8000/sse"
}
}
}

8
.mcp.json.public Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"fhi-statistikk": {
"type": "sse",
"url": "https://statistikk-mcp.sky.fhi.no/sse"
}
}
}

10
Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM python:3.12-slim AS base
WORKDIR /app
FROM base AS prod
COPY pyproject.toml .
COPY src/ src/
RUN pip install --no-cache-dir .
EXPOSE 8000
CMD ["fhi-statistikk-mcp", "--transport", "sse", "--host", "0.0.0.0", "--port", "8000"]

20
Makefile Normal file
View File

@@ -0,0 +1,20 @@
ACR := crfhiskybert.azurecr.io
IMAGE := $(ACR)/fida/ki/fhi-statistikk-mcp
TAG := $(shell git rev-parse --short HEAD)
.PHONY: docker-build docker-push docker acr-login
acr-login:
az acr login --name crfhiskybert
docker-build:
docker build --target prod -t $(IMAGE):$(TAG) -t $(IMAGE):latest .
docker-push:
docker push $(IMAGE):$(TAG)
docker push $(IMAGE):latest
docker: acr-login docker-build docker-push
run:
docker run --rm -p 18000:8000 $(IMAGE):latest

29
pyproject.toml Normal file
View File

@@ -0,0 +1,29 @@
[project]
name = "fhi-statistikk-mcp"
version = "0.1.0"
description = "MCP server for FHI Statistikk Open API"
requires-python = ">=3.12"
dependencies = [
"mcp>=1.0.0",
"uvicorn>=0.30",
"httpx>=0.27",
]
[project.optional-dependencies]
dev = [
"pytest>=8.0",
"pytest-asyncio>=0.24",
]
[project.scripts]
fhi-statistikk-mcp = "fhi_statistikk_mcp.server:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/fhi_statistikk_mcp"]
[tool.pytest.ini_options]
pythonpath = ["src"]

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,253 @@
"""Async HTTP client for FHI Statistikk Open API."""
import asyncio
import logging
import time
import httpx
from .cache import TTLCache
logger = logging.getLogger(__name__)
BASE_URL = "https://statistikk-data.fhi.no/api/open/v1"
# Cache TTLs in seconds
TTL_SOURCES = 24 * 3600
TTL_TABLES = 3600
TTL_DIMENSIONS = 6 * 3600
TTL_METADATA = 6 * 3600
TTL_FLAGS = 6 * 3600
TTL_QUERY = 6 * 3600
MIN_REQUEST_INTERVAL = 0.1 # 100ms between requests
class ApiError(Exception):
def __init__(self, status_code: int, detail: str) -> None:
self.status_code = status_code
self.detail = detail
super().__init__(f"API {status_code}: {detail}")
class ApiClient:
"""Async client wrapping the FHI Statistikk REST API with caching and rate limiting."""
def __init__(self) -> None:
self._client = httpx.AsyncClient(base_url=BASE_URL, timeout=30.0)
self._cache = TTLCache()
self._semaphore = asyncio.Semaphore(5)
self._request_lock = asyncio.Lock()
self._last_request_time = 0.0
async def close(self) -> None:
await self._client.aclose()
async def _request(
self,
method: str,
path: str,
*,
accept: str = "application/json",
json_body: dict | None = None,
) -> httpx.Response:
"""Rate-limited HTTP request with retry on 429/503/timeout."""
async with self._semaphore:
# Compute wait atomically, release lock before sleeping
async with self._request_lock:
now = time.monotonic()
wait = MIN_REQUEST_INTERVAL - (now - self._last_request_time)
self._last_request_time = max(
now, self._last_request_time + MIN_REQUEST_INTERVAL
)
if wait > 0:
await asyncio.sleep(wait)
last_exc: Exception | None = None
resp: httpx.Response | None = None
for attempt in range(3):
try:
resp = await self._client.request(
method, path, headers={"Accept": accept}, json=json_body,
)
except httpx.TimeoutException as exc:
last_exc = exc
delay = (attempt + 1) * 2
logger.warning(
"Timeout (attempt %d), retrying in %ds", attempt + 1, delay
)
await asyncio.sleep(delay)
continue
except httpx.RequestError as exc:
raise ApiError(0, f"Network error: {exc}") from exc
if resp.status_code in (429, 503) and attempt < 2:
delay = (attempt + 1) * 2
logger.warning(
"Got %d, retrying in %ds", resp.status_code, delay
)
await asyncio.sleep(delay)
continue
if resp.status_code >= 400:
raise ApiError(resp.status_code, _extract_error(resp))
return resp
# All retries exhausted
if last_exc is not None:
raise ApiError(
0, "API request timed out. Try reducing query scope."
) from last_exc
if resp is not None:
raise ApiError(resp.status_code, _extract_error(resp))
raise ApiError(0, "API request failed after retries.")
async def _get_json(self, path: str) -> dict | list:
resp = await self._request("GET", path)
return resp.json()
# --- Cached endpoints ---
async def get_sources(self) -> list[dict]:
cached = self._cache.get("sources")
if cached is not None:
return cached
data = await self._get_json("/Common/source")
self._cache.set("sources", data, TTL_SOURCES)
return data
async def get_tables(
self, source_id: str, modified_after: str | None = None,
) -> list[dict]:
cache_key = f"tables:{source_id}"
cached = self._cache.get(cache_key)
if cached is not None and modified_after is None:
return cached
path = f"/{source_id}/Table"
if modified_after:
path += f"?modifiedAfter={modified_after}"
data = await self._get_json(path)
if modified_after is None:
self._cache.set(cache_key, data, TTL_TABLES)
return data
async def get_table_info(self, source_id: str, table_id: int) -> dict:
cache_key = f"table_info:{source_id}:{table_id}"
cached = self._cache.get(cache_key)
if cached is not None:
return cached
data = await self._get_json(f"/{source_id}/Table/{table_id}")
self._cache.set(cache_key, data, TTL_METADATA)
return data
async def get_dimensions(self, source_id: str, table_id: int) -> list[dict]:
cache_key = f"dims:{source_id}:{table_id}"
cached = self._cache.get(cache_key)
if cached is not None:
return cached
data = await self._get_json(f"/{source_id}/Table/{table_id}/dimension")
if isinstance(data, dict):
data = data.get("dimensions", [])
self._cache.set(cache_key, data, TTL_DIMENSIONS)
return data
async def get_metadata(self, source_id: str, table_id: int) -> dict:
cache_key = f"meta:{source_id}:{table_id}"
cached = self._cache.get(cache_key)
if cached is not None:
return cached
data = await self._get_json(f"/{source_id}/Table/{table_id}/metadata")
self._cache.set(cache_key, data, TTL_METADATA)
return data
async def get_flags(self, source_id: str, table_id: int) -> list[dict]:
cache_key = f"flags:{source_id}:{table_id}"
cached = self._cache.get(cache_key)
if cached is not None:
return cached
data = await self._get_json(f"/{source_id}/Table/{table_id}/flag")
self._cache.set(cache_key, data, TTL_FLAGS)
return data
async def get_query_template(self, source_id: str, table_id: int) -> dict:
cache_key = f"query:{source_id}:{table_id}"
cached = self._cache.get(cache_key)
if cached is not None:
return cached
data = await self._get_json(f"/{source_id}/Table/{table_id}/query")
self._cache.set(cache_key, data, TTL_QUERY)
return data
async def post_data(
self,
source_id: str,
table_id: int,
body: dict,
max_row_count: int = 50000,
) -> str:
"""Post a data query, return raw CSV text."""
request_body = {**body}
request_body["response"] = {
"format": "csv2",
"maxRowCount": max_row_count,
}
resp = await self._request(
"POST",
f"/{source_id}/Table/{table_id}/data",
accept="text/csv",
json_body=request_body,
)
return resp.text
# --- Module-level lifecycle management ---
_client: ApiClient | None = None
def init_client() -> ApiClient:
"""Create the shared client. Call from server lifespan."""
global _client
_client = ApiClient()
return _client
async def close_client() -> None:
"""Close the shared client. Call from server lifespan shutdown."""
global _client
if _client is not None:
await _client.close()
_client = None
def get_client() -> ApiClient:
"""Get (or lazily create) the shared client."""
global _client
if _client is None:
_client = ApiClient()
return _client
def _extract_error(resp: httpx.Response) -> str:
"""Extract human-readable error from API response (RFC 7807 ProblemDetails)."""
try:
body = resp.json()
if isinstance(body, dict):
parts = []
title = body.get("title", "")
if title:
parts.append(title)
detail = body.get("detail", "")
if detail:
parts.append(detail)
errors = body.get("errors", {})
if isinstance(errors, dict):
for msgs in errors.values():
if isinstance(msgs, list):
parts.extend(str(m) for m in msgs)
if parts:
return " | ".join(parts)
return str(body)
except Exception:
pass
return resp.text[:500] if resp.text else f"HTTP {resp.status_code}"

View File

@@ -0,0 +1,27 @@
"""Simple in-memory TTL cache."""
import time
from typing import Any
class TTLCache:
"""Dict-based cache with per-key TTL expiry."""
def __init__(self) -> None:
self._store: dict[str, tuple[float, Any]] = {}
def get(self, key: str) -> Any | None:
entry = self._store.get(key)
if entry is None:
return None
expires_at, value = entry
if time.monotonic() > expires_at:
del self._store[key]
return None
return value
def set(self, key: str, value: Any, ttl_seconds: float) -> None:
self._store[key] = (time.monotonic() + ttl_seconds, value)
def clear(self) -> None:
self._store.clear()

View File

@@ -0,0 +1,311 @@
"""MCP server exposing FHI Statistikk Open API as agent-friendly tools."""
import argparse
import asyncio
import logging
import sys
from contextlib import asynccontextmanager
from typing import Optional
for _name in ("uvicorn", "uvicorn.error", "uvicorn.access", "mcp", "fastmcp"):
_log = logging.getLogger(_name)
_log.handlers = []
_handler = logging.StreamHandler(sys.stderr)
_handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
_log.addHandler(_handler)
_log.propagate = False
logger = logging.getLogger("fhi_statistikk_mcp")
logger.setLevel(logging.INFO)
_h = logging.StreamHandler(sys.stderr)
_h.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
logger.addHandler(_h)
from mcp.server.fastmcp import FastMCP
from .api_client import ApiError, close_client, get_client, init_client
from .transformers import (
complete_query_dimensions,
extract_metadata_fields,
matches_search,
navigate_hierarchy,
parse_csv_to_rows,
strip_html,
summarize_dimensions,
)
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
async def list_sources() -> list[dict]:
"""List all available FHI data sources.
Returns an array of sources with id, title, description, and publisher.
This is the entry point for discovering what data is available.
Sources include public health statistics (Folkehelsestatistikk),
vaccination registry (SYSVAK), cause of death registry, and more.
All data is open (CC BY 4.0), no authentication required.
"""
api = get_client()
raw = await api.get_sources()
return [
{
"id": s.get("id", ""),
"title": s.get("title", ""),
"description": strip_html(s.get("description", "")),
"published_by": s.get("publishedBy", ""),
}
for s in raw
]
async def list_tables(
source_id: str,
search: Optional[str] = None,
modified_after: Optional[str] = None,
) -> list[dict]:
"""List tables within a data source, with optional keyword search.
Args:
source_id: Source identifier, e.g. "nokkel", "msis", "daar".
Use list_sources to find available source IDs.
search: Case-insensitive keyword filter on table title.
Multiple words must all match. Example: "befolkning vekst"
modified_after: ISO-8601 datetime. Only return tables modified after
this date. Example: "2025-01-01"
Returns tables sorted by modification date (newest first).
"""
api = get_client()
raw = await api.get_tables(source_id, modified_after)
tables = [
{
"table_id": t.get("tableId"),
"title": t.get("title", ""),
"published_at": t.get("publishedAt", ""),
"modified_at": t.get("modifiedAt", ""),
}
for t in raw
]
if search:
tables = [t for t in tables if matches_search(t["title"], search)]
tables.sort(key=lambda t: t["modified_at"] or "", reverse=True)
return tables
async def describe_table(source_id: str, table_id: int) -> dict:
"""Get complete table structure: dimensions, metadata, and flags.
This is the primary tool for understanding a table before querying data.
Returns everything needed to construct a query_data call.
Args:
source_id: Source identifier, e.g. "nokkel"
table_id: Numeric table ID from list_tables
The response includes:
- title, dates, description, keywords, update frequency
- dimensions with their codes, labels, and available values
- flags (symbols for missing/suppressed data)
Large dimensions (GEO with 400+ entries) show only top-level values.
Use get_dimension_values to drill into sub-levels.
Fixed dimensions (single value, like KJONN="kjønn samlet") are marked
with is_fixed=true -- query_data auto-includes these.
"""
api = get_client()
info, dims, meta, flags = await asyncio.gather(
api.get_table_info(source_id, table_id),
api.get_dimensions(source_id, table_id),
api.get_metadata(source_id, table_id),
api.get_flags(source_id, table_id),
)
meta_fields = extract_metadata_fields(meta)
dim_summaries = summarize_dimensions(dims if isinstance(dims, list) else [])
result = {
"title": info.get("title", ""),
"published_at": info.get("publishedAt", ""),
"modified_at": info.get("modifiedAt", ""),
}
result.update(meta_fields)
result["dimensions"] = dim_summaries
result["flags"] = [
{"symbol": f.get("symbol", ""), "description": f.get("description", "")}
for f in (flags if isinstance(flags, list) else [])
]
return result
async def get_dimension_values(
source_id: str,
table_id: int,
dimension_code: str,
parent_value: Optional[str] = None,
search: Optional[str] = None,
) -> list[dict]:
"""Drill into dimension values, especially for large hierarchical dimensions like GEO.
Args:
source_id: Source identifier
table_id: Table ID
dimension_code: Dimension code, e.g. "GEO", "AAR", "ALDER"
parent_value: Return only children of this category.
Example: "18" for Nordland county municipalities.
If omitted, returns top-level categories.
search: Case-insensitive search on category labels.
Accent-insensitive: "tromso" matches "Tromsø".
Example: "bodø", "oslo", "bergen"
Returns array of {value, label, child_count}.
"""
api = get_client()
dims = await api.get_dimensions(source_id, table_id)
target = None
for d in (dims if isinstance(dims, list) else []):
if d.get("code", "").upper() == dimension_code.upper():
target = d
break
if target is None:
available = [d.get("code", "") for d in (dims if isinstance(dims, list) else [])]
raise ValueError(
f"Dimension '{dimension_code}' not found. "
f"Available: {', '.join(available)}"
)
raw_categories = target.get("categories", [])
return navigate_hierarchy(raw_categories, parent_value, search)
async def query_data(
source_id: str,
table_id: int,
dimensions: list[dict],
max_rows: int = 1000,
) -> dict:
"""Fetch statistical data from an FHI table.
Before calling this, use describe_table to understand the table's
dimensions and available values.
Args:
source_id: Source identifier
table_id: Table ID
dimensions: Array of dimension filters. Each element:
- code (str): Dimension code, e.g. "GEO"
- filter (str): "item" (exact), "all" (wildcard), "top" (first N), "bottom" (last N)
- values (list[str]): Filter values
You only need to specify dimensions you care about.
Fixed dimensions (single-valued) are auto-included.
If you omit MEASURE_TYPE, all measures are returned.
All other dimensions MUST be specified or a ValueError is raised.
max_rows: Max rows to return (default 1000, 0 for unlimited)
Year values: use "2020" (auto-translated to "2020_2020") or full format.
Returns labeled rows with truncation info. Check "truncated" field.
"""
api = get_client()
raw_dims = await api.get_dimensions(source_id, table_id)
dim_list = raw_dims if isinstance(raw_dims, list) else []
query_dims = complete_query_dimensions(dim_list, dimensions)
body = {"dimensions": query_dims}
try:
csv_text = await api.post_data(source_id, table_id, body)
except ApiError as e:
raise ValueError(f"API error: {e.detail}") from e
parsed = parse_csv_to_rows(csv_text, max_rows)
parsed["dimensions_used"] = {
d["code"]: {"filter": d["filter"], "values": d["values"]}
for d in query_dims
}
try:
info = await api.get_table_info(source_id, table_id)
parsed["table"] = info.get("title", "")
except Exception:
pass
return parsed
async def get_query_template(source_id: str, table_id: int) -> dict:
"""Get the raw query template for a table.
Returns the exact JSON body the API expects for data queries.
Useful when query_data auto-completion isn't behaving as expected,
or to see all available values for every dimension.
Args:
source_id: Source identifier
table_id: Table ID
"""
api = get_client()
return await api.get_query_template(source_id, table_id)
# ---------------------------------------------------------------------------
# Server construction and entry point
# ---------------------------------------------------------------------------
def _build_mcp(host: str, port: int) -> FastMCP:
@asynccontextmanager
async def _lifespan(_server: FastMCP):
init_client()
logger.info("HTTP client initialized")
try:
yield
finally:
await close_client()
logger.info("HTTP client closed")
server = FastMCP("fhi-statistikk", host=host, port=port, lifespan=_lifespan)
server.tool(name="list_sources")(list_sources)
server.tool(name="list_tables")(list_tables)
server.tool(name="describe_table")(describe_table)
server.tool(name="get_dimension_values")(get_dimension_values)
server.tool(name="query_data")(query_data)
server.tool(name="get_query_template")(get_query_template)
return server
def main():
ap = argparse.ArgumentParser(description="FHI Statistikk MCP Server")
ap.add_argument(
"--transport",
default="sse",
choices=["stdio", "sse", "streamable-http"],
)
ap.add_argument("--host", default="0.0.0.0")
ap.add_argument("--port", type=int, default=8000)
args = ap.parse_args()
logger.info("Starting FHI Statistikk MCP server")
logger.info(" API: %s", "https://statistikk-data.fhi.no/api/open/v1/")
logger.info(" Transport: %s on %s:%d", args.transport, args.host, args.port)
server = _build_mcp(args.host, args.port)
server.run(transport=args.transport)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,347 @@
"""Data transformation utilities for FHI API responses."""
import csv
import io
import re
import unicodedata
# --- HTML stripping ---
_HTML_TAG_RE = re.compile(r"<[^>]+>")
_WHITESPACE_RE = re.compile(r"\s+")
def strip_html(text: str) -> str:
"""Remove HTML tags, decode entities, collapse whitespace."""
if not text:
return text
text = _HTML_TAG_RE.sub(" ", text)
text = (
text.replace("&amp;", "&")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&nbsp;", " ")
.replace("&quot;", '"')
.replace("&#39;", "'")
)
return _WHITESPACE_RE.sub(" ", text).strip()
# --- Accent-insensitive search ---
_NORDIC_MAP = str.maketrans({
"æ": "a", "ø": "o", "å": "a",
"ü": "u",
})
def normalize_for_search(text: str) -> str:
"""Normalize for accent-insensitive comparison.
Handles Nordic characters (æøå) and combining accents.
"tromso" matches "Tromsø", "barum" matches "Bærum".
"""
lowered = text.lower().translate(_NORDIC_MAP)
nfd = unicodedata.normalize("NFD", lowered)
return "".join(c for c in nfd if unicodedata.category(c) != "Mn")
def matches_search(text: str, query: str) -> bool:
"""Check if all query words appear in text (accent-insensitive)."""
normalized = normalize_for_search(text)
words = normalize_for_search(query).split()
return all(w in normalized for w in words)
# --- Year normalization ---
def normalize_year_value(value: str) -> str:
"""Convert "2020" to "2020_2020" if not already in that format."""
if "_" not in value and value.isdigit():
return f"{value}_{value}"
return value
# --- Category tree operations ---
def flatten_categories(categories: list[dict]) -> list[dict]:
"""Flatten a nested category tree into a flat list with parent_value refs.
Input: [{value, label, children: [{...}]}]
Output: [{value, label, parent_value: str|None}, ...]
"""
flat: list[dict] = []
def _walk(nodes: list[dict], parent: str | None) -> None:
for node in nodes:
flat.append({
"value": node.get("value", ""),
"label": node.get("label", ""),
"parent_value": parent,
})
children = node.get("children") or []
if children:
_walk(children, node.get("value", ""))
_walk(categories, None)
return flat
def navigate_hierarchy(
raw_categories: list[dict],
parent_value: str | None = None,
search: str | None = None,
) -> list[dict]:
"""Navigate a nested category tree. Returns [{value, label, child_count}]."""
flat = flatten_categories(raw_categories)
if search:
return [
{
"value": c["value"],
"label": c["label"],
"child_count": _count_children(c["value"], flat),
}
for c in flat
if matches_search(c["label"], search)
]
if parent_value is None:
targets = [c for c in flat if c["parent_value"] is None]
else:
targets = [c for c in flat if c["parent_value"] == parent_value]
return [
{
"value": c["value"],
"label": c["label"],
"child_count": _count_children(c["value"], flat),
}
for c in targets
]
# --- Dimension summarization ---
def summarize_dimensions(dimensions: list[dict]) -> list[dict]:
"""Transform raw dimension data into agent-friendly summaries."""
result = []
for dim in dimensions:
code = dim.get("code", "")
label = dim.get("label", "")
raw_categories = dim.get("categories", [])
flat = flatten_categories(raw_categories)
summary: dict = {
"code": code,
"label": label,
"total_categories": len(flat),
}
has_hierarchy = any(c["parent_value"] is not None for c in flat)
summary["is_hierarchical"] = has_hierarchy
if len(flat) == 1:
summary["is_fixed"] = True
summary["values"] = [
{"value": flat[0]["value"], "label": flat[0]["label"]}
]
summary["note"] = "Single-valued, auto-included in queries"
elif has_hierarchy and len(flat) > 20:
top_level = [c for c in flat if c["parent_value"] is None]
summary["hierarchy_depth"] = _compute_depth(flat)
summary["top_level_values"] = [
{
"value": c["value"],
"label": c["label"],
"child_count": _count_children(c["value"], flat),
}
for c in top_level
]
summary["note"] = "Use get_dimension_values to drill into sub-levels"
elif is_year_dimension(code, flat):
values = [c["value"] for c in flat]
years = _extract_year_range(values)
summary["is_hierarchical"] = False
summary["value_format"] = "YYYY_YYYY (e.g. 2020_2020)"
if years:
summary["range"] = f"{years[0]}..{years[-1]}"
if len(flat) <= 50:
summary["values"] = values
else:
summary["values"] = [
{"value": c["value"], "label": c["label"]}
for c in flat
]
result.append(summary)
return result
def extract_metadata_fields(metadata: dict) -> dict:
"""Extract key fields from metadata response.
API returns: {name, isOfficialStatistics, paragraphs: [{header, content}]}
"""
fields: dict = {}
if isinstance(metadata, dict):
if metadata.get("isOfficialStatistics") is not None:
fields["is_official_statistics"] = metadata["isOfficialStatistics"]
paragraphs = metadata.get("paragraphs", [])
elif isinstance(metadata, list):
paragraphs = metadata
else:
paragraphs = []
for section in paragraphs:
header = (section.get("header") or "").lower()
content = strip_html(section.get("content") or "")
if not content:
continue
if "beskrivelse" in header or "description" in header:
fields["description"] = content
elif "oppdater" in header or "frekvens" in header:
fields["update_frequency"] = content
elif "nøkkelord" in header or "keyword" in header or "emneord" in header:
fields["keywords"] = [k.strip() for k in content.split(",")]
elif "kilde" in header or "source" in header or "institusjon" in header:
fields["source_institution"] = content
return fields
# --- Query dimension completion ---
def complete_query_dimensions(
dim_definitions: list[dict],
user_dimensions: list[dict],
) -> list[dict]:
"""Build complete query dimension list from user input and table definitions.
- User-provided dimensions pass through (with year normalization for "item" filter).
- Fixed dimensions (1 category) are auto-included.
- MEASURE_TYPE defaults to filter="all", values=["*"].
- Missing non-fixed dimensions raise ValueError.
"""
for d in user_dimensions:
if "code" not in d:
raise ValueError(f"Dimension entry missing 'code' key: {d}")
provided = {d["code"].upper(): d for d in user_dimensions}
query_dims = []
missing = []
for dim_def in dim_definitions:
code = dim_def.get("code", "")
raw_categories = dim_def.get("categories", [])
flat = flatten_categories(raw_categories)
upper_code = code.upper()
if upper_code in provided:
d = provided[upper_code]
filt = d.get("filter", "item")
vals = d.get("values", [])
if filt == "item" and is_year_dimension(code, flat):
vals = [normalize_year_value(v) for v in vals]
query_dims.append({"code": code, "filter": filt, "values": vals})
elif len(flat) == 1:
query_dims.append({
"code": code,
"filter": "item",
"values": [flat[0]["value"]],
})
elif upper_code == "MEASURE_TYPE":
query_dims.append({
"code": code,
"filter": "all",
"values": ["*"],
})
else:
missing.append(code)
if missing:
raise ValueError(
f"Missing required dimensions: {', '.join(missing)}. "
"Specify these or use filter='all' with values=['*'] to include all."
)
return query_dims
# --- CSV parsing ---
def parse_csv_to_rows(csv_text: str, max_rows: int = 1000) -> dict:
"""Parse semicolon-delimited CSV response into structured rows."""
reader = csv.DictReader(io.StringIO(csv_text), delimiter=";")
rows = []
total = 0
for row in reader:
total += 1
if max_rows > 0 and len(rows) >= max_rows:
continue # keep counting total
cleaned = {}
for k, v in row.items():
if k is None:
continue
cleaned[k.strip()] = _try_numeric(v.strip() if v else "")
rows.append(cleaned)
return {
"rows": rows,
"total_rows": total,
"truncated": total > len(rows),
}
# --- Internal helpers ---
def _count_children(value: str, flat: list[dict]) -> int:
return sum(1 for c in flat if c["parent_value"] == value)
def _compute_depth(flat: list[dict]) -> int:
"""Compute hierarchy depth with cycle detection."""
parent_map = {c["value"]: c["parent_value"] for c in flat}
max_depth = 1
for val in parent_map:
depth = 1
current = val
seen: set[str] = set()
while parent_map.get(current) and current not in seen:
seen.add(current)
current = parent_map[current]
depth += 1
max_depth = max(max_depth, depth)
return max_depth
def is_year_dimension(code: str, flat: list[dict]) -> bool:
if code.upper() in ("AAR", "YEAR"):
return True
if flat and re.match(r"^\d{4}_\d{4}$", flat[0]["value"]):
return True
return False
def _extract_year_range(values: list[str]) -> list[int]:
years = []
for v in values:
m = re.match(r"^(\d{4})(?:_\d{4})?$", v)
if m:
years.append(int(m.group(1)))
return sorted(years)
def _try_numeric(value: str):
"""Try to convert a string to int or float. Returns None for missing-data symbols."""
if not value or value in ("..", ":", "-"):
return None
try:
if "." in value or "," in value:
return float(value.replace(",", "."))
return int(value)
except ValueError:
return value

1
tests/__init__.py Normal file
View File

@@ -0,0 +1 @@

Binary file not shown.

2
tests/fixtures/data_185.csv vendored Normal file
View File

@@ -0,0 +1,2 @@
"Geografi";"År";"Kjønn";"Alder";"antall";"prosent vekst";"FLAGG"
"Oslo";"2024";"kjønn samlet";"alle aldre";"6580";"0.916804837608505";""
1 Geografi År Kjønn Alder antall prosent vekst FLAGG
2 Oslo 2024 kjønn samlet alle aldre 6580 0.916804837608505

2234
tests/fixtures/dimensions_185.json vendored Normal file

File diff suppressed because it is too large Load Diff

7
tests/fixtures/flags_185.json vendored Normal file
View File

@@ -0,0 +1,7 @@
[
{
"value": 0,
"symbol": "",
"description": "Verdi finnes i tabellen"
}
]

58
tests/fixtures/metadata_185.json vendored Normal file
View File

@@ -0,0 +1,58 @@
{
"name": "Befolkningsvekst",
"isOfficialStatistics": false,
"paragraphs": [
{
"header": "Beskrivelse",
"content": "<p>Differansen mellom befolkningsmengden i slutten av året (målt 1. januar året etter) og i begynnelsen av året (1. januar). Statistikken vises for kommune- og fylkesinndeling per 1.1.2024.</p><p></p><p></p><p>To måltall er tilgjengelig:</p><ol><li>Antall</li><li>Prosent vekst = prosentvis vekst i folketallet, i prosent av folketall ved inngangen av året</li></ol>"
},
{
"header": "Begrunnelse for valg av indikatoren",
"content": "<p>Mange av indikatorene i statistikkbanken er relatert til befolkningstall og -sammensetning, og befolkningsveksten i en kommune bidrar til informasjon om dette. Omtrent en tredjedel av veksten i Norge skyldes fødselsoverskudd, mens resten skyldes netto innvandring.</p><p></p><p>Kilde: </p><p>FHIs folkehelserapport: <a href=\"https://www.fhi.no/he/folkehelserapporten/samfunn/befolkningen/\" rel=\"noopener noreferrer\" target=\"_blank\">Befolkningen i Norge</a></p>"
},
{
"header": "Kildeinstitusjon",
"content": "Statistisk sentralbyrå (SSB)"
},
{
"header": "Innsamling",
"content": "Statistikken beregnes fra Statistisk sentralbyrås befolkningsstatistikk som bygger på folkeregisteropplysninger."
},
{
"header": "Tolking og feilkilder",
"content": "Det er tatt hensyn til mindre grensejusteringer mellom kommuner i statistikken."
},
{
"header": "Datakvalitet",
"content": "En del feil ved innsamling og bearbeiding av dataene er uunngåelig. Det kan være kodefeil, revisjonsfeil, etc. Det er utført et omfattende arbeid for å minimalisere disse feilene, og disse feiltypene anses for å være relativt ubetydelige."
},
{
"header": "Oppdateringsfrekvens",
"content": "Årlig"
},
{
"header": "Nøkkelord",
"content": "Befolkning,Befolkningsvekst"
},
{
"header": "Relatert materiale",
"content": "<p>FHIs folkehelserapport: <a href=\"https://www.fhi.no/he/folkehelserapporten/samfunn/befolkningen/\" rel=\"noopener noreferrer\" target=\"_blank\">Befolkningen i Norge</a> </p>"
},
{
"header": "Geografi",
"content": "<p>Hele landet, fylker og kommuner. Bydeler i Oslo, Bergen, Stavanger og Trondheim. </p>"
},
{
"header": "År",
"content": "2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 og 2024"
},
{
"header": "Kjønn",
"content": "kjønn samlet"
},
{
"header": "Alder",
"content": "alle aldre"
}
]
}

474
tests/fixtures/query_185.json vendored Normal file
View File

@@ -0,0 +1,474 @@
{
"dimensions": [
{
"code": "GEO",
"filter": "item",
"values": [
"0",
"3436",
"460104",
"4611",
"4617",
"4647",
"5027",
"1851",
"1857",
"1865",
"1866",
"1867",
"1868",
"1870",
"1871",
"1874",
"1875",
"5501",
"5510",
"5512",
"5514",
"5516",
"5518",
"5520",
"5522",
"5524",
"5526",
"5528",
"5530",
"5532",
"5534",
"5536",
"5540",
"5542",
"5544",
"5546",
"5601",
"5603",
"5605",
"3911",
"5026",
"5029",
"1856",
"030105",
"030106",
"3430",
"3336",
"3338",
"3401",
"3403",
"3407",
"3411",
"3412",
"3413",
"3414",
"1806",
"1811",
"1813",
"1815",
"1816",
"1818",
"1820",
"1822",
"1824",
"1825",
"1827",
"1828",
"1832",
"1833",
"15",
"50",
"18",
"55",
"56",
"03",
"0301",
"030108",
"030109",
"030110",
"030111",
"030112",
"030113",
"030114",
"030115",
"31",
"3101",
"3103",
"32",
"33",
"34",
"39",
"40",
"42",
"11",
"110301",
"110302",
"110304",
"110305",
"46",
"4217",
"4218",
"1834",
"1835",
"1836",
"1837",
"1839",
"1840",
"1841",
"1845",
"1848",
"5031",
"5033",
"5034",
"5035",
"5036",
"5037",
"5038",
"5041",
"5042",
"5044",
"5045",
"5046",
"5047",
"5049",
"5052",
"5053",
"5054",
"5055",
"5056",
"5058",
"5059",
"5060",
"5061",
"1804",
"030101",
"030102",
"030103",
"030104",
"3434",
"4204",
"110306",
"110307",
"110308",
"110309",
"460101",
"460102",
"460103",
"460105",
"460106",
"460107",
"4629",
"4650",
"5028",
"1853",
"1859",
"5614",
"5616",
"5618",
"5620",
"5622",
"5624",
"5626",
"5628",
"5630",
"5632",
"5634",
"3105",
"3107",
"4214",
"4215",
"4216",
"4219",
"4220",
"4221",
"4222",
"4224",
"4225",
"4226",
"4227",
"4228",
"1101",
"1103",
"1106",
"1108",
"1111",
"1112",
"1114",
"1120",
"1121",
"1122",
"1124",
"1127",
"1130",
"1133",
"1134",
"1144",
"1145",
"1146",
"1149",
"1151",
"1160",
"4601",
"1505",
"1506",
"1508",
"1511",
"1514",
"1516",
"1517",
"1520",
"1525",
"1528",
"1531",
"1532",
"1535",
"1539",
"1547",
"1560",
"1563",
"1566",
"1573",
"1577",
"1578",
"1579",
"030107",
"3112",
"3205",
"3330",
"3405",
"3419",
"3437",
"3447",
"3901",
"4212",
"110303",
"1119",
"1135",
"460108",
"4637",
"1515",
"1557",
"500101",
"500102",
"500103",
"500104",
"5057",
"1812",
"1826",
"1838",
"5607",
"5612",
"5636",
"3110",
"3226",
"3314",
"3429",
"4012",
"4030",
"4036",
"4223",
"4627",
"1554",
"1576",
"5007",
"5032",
"5043",
"1860",
"5503",
"5538",
"5610",
"3114",
"3116",
"3118",
"3120",
"3122",
"3124",
"3201",
"3203",
"3207",
"3209",
"3212",
"3214",
"3216",
"3218",
"3220",
"3222",
"3224",
"3228",
"3230",
"3232",
"3234",
"3236",
"3238",
"3240",
"3242",
"3301",
"3303",
"3305",
"3310",
"3312",
"3316",
"3318",
"3320",
"3322",
"3324",
"3326",
"3328",
"3332",
"3334",
"3415",
"3416",
"3417",
"3418",
"3420",
"3421",
"3422",
"3423",
"3424",
"3425",
"3426",
"3427",
"3428",
"3431",
"3432",
"3433",
"3435",
"3438",
"3439",
"3440",
"3441",
"3442",
"3443",
"3446",
"3448",
"3449",
"3450",
"3451",
"3452",
"3453",
"3454",
"3903",
"3905",
"3907",
"3909",
"4001",
"4003",
"4005",
"4010",
"4014",
"4016",
"4018",
"4020",
"4022",
"4024",
"4026",
"4028",
"4032",
"4034",
"4201",
"4202",
"4203",
"4205",
"4206",
"4207",
"4211",
"4213",
"4602",
"4612",
"4613",
"4614",
"4615",
"4616",
"4618",
"4619",
"4620",
"4621",
"4622",
"4623",
"4624",
"4625",
"4626",
"4628",
"4630",
"4631",
"4632",
"4633",
"4634",
"4635",
"4636",
"4638",
"4639",
"4640",
"4641",
"4642",
"4643",
"4644",
"4645",
"4646",
"4648",
"4649",
"4651",
"1580",
"5001",
"5006",
"5014",
"5020",
"5021",
"5022",
"5025"
]
},
{
"code": "AAR",
"filter": "item",
"values": [
"2002_2002",
"2003_2003",
"2004_2004",
"2005_2005",
"2006_2006",
"2007_2007",
"2008_2008",
"2009_2009",
"2010_2010",
"2011_2011",
"2012_2012",
"2013_2013",
"2014_2014",
"2015_2015",
"2016_2016",
"2017_2017",
"2018_2018",
"2019_2019",
"2020_2020",
"2021_2021",
"2022_2022",
"2023_2023",
"2024_2024"
]
},
{
"code": "KJONN",
"filter": "item",
"values": [
"0"
]
},
{
"code": "ALDER",
"filter": "item",
"values": [
"0_120"
]
},
{
"code": "MEASURE_TYPE",
"filter": "item",
"values": [
"TELLER",
"RATE"
]
}
],
"response": {
"format": "json-stat2",
"maxRowCount": 50000
}
}

93
tests/fixtures/sources.json vendored Normal file
View File

@@ -0,0 +1,93 @@
[
{
"id": "nokkel",
"title": "Folkehelsestatistikk",
"description": "Statistikk om befolkning, oppvekst og levekår, miljø, skader, helserelatert atferd og helsetilstand.",
"aboutUrl": "https://www.helsedirektoratet.no/forebygging-diagnose-og-behandling/forebygging-og-levevaner/folkehelsestatistikk-og-profiler",
"publishedBy": "Helsedirektoratet"
},
{
"id": "ngs",
"title": "Mikrobiologisk genomovervåkning",
"description": "Data fra helgenomsekvesering og andre genetiske analyser av bakterier og virus utført ved FHIs referanselaboratorier.",
"aboutUrl": "https://www.fhi.no/hd/laboratorie-analyser",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "mfr",
"title": "Medisinsk fødselsregister",
"description": "Om svangerskap, fødsler og nyfødte i Norge fra 1967 til i dag.",
"aboutUrl": "https://www.fhi.no/op/mfr",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "abr",
"title": "Abortregisteret",
"description": "Om svangerskapsbrudd i Norge fra 1979 til i dag.",
"aboutUrl": "https://www.fhi.no/op/abortregisteret",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "sysvak",
"title": "Nasjonalt vaksinasjonsregister SYSVAK",
"description": "Data for influensa- og koronavaksinasjoner og for Barnevaksinasjonsprogrammet (dekningsstatistikk).",
"aboutUrl": "https://www.fhi.no/va/sysvak/",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "daar",
"title": "Dødsårsakregisteret",
"description": "Om dødsfall og dødsårsaker i Norge fra 1951 til i dag.",
"aboutUrl": "https://www.fhi.no/op/dodsarsaksregisteret/",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "msis",
"title": "Meldingssystem for smittsomme sykdommer (MSIS) ",
"description": "Oversikt over meldingspliktige sykdommer fra 1977 til i dag.",
"aboutUrl": "https://allvis.fhi.no/msis",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "lmr",
"title": "Legemiddelregisteret",
"description": "Her finner du oversikt over legemidler som er utlevert på resept i Norge (fra og med 2004).",
"aboutUrl": "https://www.fhi.no/he/legemiddelregisteret/",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "gs",
"title": "Grossiststatistikk",
"description": "Salg av reseptbelagte og reseptfrie legemidler fra grossister til blant annet apotek, institusjoner og dagligvare.",
"aboutUrl": "https://www.fhi.no/he/legemiddelbruk/om-den-grossistbaserte-legemiddelfo/#statistikkbanker-med-data-om-legemiddelforbruk",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "npr",
"title": "Norsk pasientregister",
"description": "Statistikk fra somatiske fagområder, psykisk helsevern og tverrfaglig spesialisert rusbehandling.",
"aboutUrl": "https://www.fhi.no/he/npr",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "kpr",
"title": "Kommunalt pasient- og brukerregister",
"description": "Statistikk om bruk av helse- og omsorgstjenester i kommunene.",
"aboutUrl": "https://www.fhi.no/he/kpr/statistikk-og-rapporter",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "hkr",
"title": "Hjerte- og karsykdommer",
"description": "Opplysninger fra 2012 til i dag om personer med sykdommer i hjertet og blodårene, og om behandlingen av disse sykdommene.",
"aboutUrl": "https://www.fhi.no/is/hjertekar2/",
"publishedBy": "Folkehelseinstituttet"
},
{
"id": "skast",
"title": "Skadedyrstatistikk",
"description": "Skadedyrstatistikken gir oversikt over hvor ofte utvalgte skadedyr bekjempes i Norge. Statistikken bygger på månedlige rapporter fra skadedyrfirmaer og viser utvikling mellom år og sesongvariasjoner.",
"aboutUrl": "https://www.fhi.no/sk/skadedyrbekjempelse/statistikk-om-skadedyr",
"publishedBy": "Folkehelseinstituttet"
}
]

6
tests/fixtures/table_185.json vendored Normal file
View File

@@ -0,0 +1,6 @@
{
"tableId": 185,
"title": "Befolkningsvekst",
"publishedAt": "2025-10-21T08:56:39.806397Z",
"modifiedAt": "2025-10-21T08:56:39.806397Z"
}

728
tests/fixtures/tables_nokkel.json vendored Normal file
View File

@@ -0,0 +1,728 @@
[
{
"tableId": 334,
"title": "Antibiotikaresepter",
"publishedAt": "2024-11-04T19:48:29.225776Z",
"modifiedAt": "2024-11-04T19:48:29.225776Z"
},
{
"tableId": 364,
"title": "Hjerte- og karregisteret_3aarigLFKB",
"publishedAt": "2024-10-29T12:57:13.733322Z",
"modifiedAt": "2024-10-29T12:57:13.733322Z"
},
{
"tableId": 365,
"title": "Hjerte- og karregisteret_1aarigLHF",
"publishedAt": "2024-12-18T12:13:02.962319Z",
"modifiedAt": "2025-01-09T06:58:42.47032Z"
},
{
"tableId": 385,
"title": "Legemidler til behandling av type-2 diabetes_3aarigLFK",
"publishedAt": "2024-10-29T13:09:17.201712Z",
"modifiedAt": "2024-10-29T13:09:17.201712Z"
},
{
"tableId": 403,
"title": "Vedvarende_lavinntekt_kommunegrense",
"publishedAt": "2025-04-24T12:16:59.538284Z",
"modifiedAt": "2025-04-24T12:16:59.538285Z"
},
{
"tableId": 601,
"title": "Mediebruk_DataTVspill_Ungdata_KH",
"publishedAt": "2025-05-21T11:28:47.455265Z",
"modifiedAt": "2025-05-21T11:43:20.96496Z"
},
{
"tableId": 602,
"title": "Mediebruk_SOME_Ungdata_KH",
"publishedAt": "2025-05-21T11:45:16.001373Z",
"modifiedAt": "2025-05-21T11:45:16.001374Z"
},
{
"tableId": 171,
"title": "Befolkningsframskriving",
"publishedAt": "2025-02-25T14:42:59.238949Z",
"modifiedAt": "2025-07-27T07:07:07.001823Z"
},
{
"tableId": 358,
"title": "Gjeld",
"publishedAt": "2025-02-26T11:51:11.007021Z",
"modifiedAt": "2025-02-26T11:51:11.007021Z"
},
{
"tableId": 362,
"title": "Grunnskolepoeng_UTDANN",
"publishedAt": "2026-01-08T15:38:55.208332Z",
"modifiedAt": "2026-01-08T15:38:55.208332Z"
},
{
"tableId": 377,
"title": "Mobbing, 7. og 10. klasse, 3-årige tall",
"publishedAt": "2025-03-07T13:27:29.115965Z",
"modifiedAt": "2025-03-07T13:27:29.115965Z"
},
{
"tableId": 336,
"title": "Barnehagekvalitet_bemanning",
"publishedAt": "2025-04-01T10:00:58.015678Z",
"modifiedAt": "2025-04-01T10:00:58.015678Z"
},
{
"tableId": 355,
"title": "Fremtidsoptimisme_Ungdata_KH",
"publishedAt": "2025-04-23T08:48:45.229583Z",
"modifiedAt": "2025-04-23T08:48:45.229583Z"
},
{
"tableId": 392,
"title": "Stønad_livsopphold",
"publishedAt": "2025-05-09T12:50:19.326568Z",
"modifiedAt": "2025-05-13T14:39:57.70227Z"
},
{
"tableId": 664,
"title": "Sosialhjelpsmottakere",
"publishedAt": "2025-06-06T06:36:12.451102Z",
"modifiedAt": "2025-06-06T06:36:12.451103Z"
},
{
"tableId": 667,
"title": "Mediebruk_underhold_ungdata",
"publishedAt": "2025-05-22T10:24:12.167179Z",
"modifiedAt": "2026-02-11T10:21:53.318047Z"
},
{
"tableId": 685,
"title": "Regneferd_UTDANN_3",
"publishedAt": "2026-02-12T14:31:30.453553Z",
"modifiedAt": "2026-02-12T14:31:30.453553Z"
},
{
"tableId": 688,
"title": "Forventede funksjonsfriske leveår_7",
"publishedAt": "2025-06-23T09:07:30.416427Z",
"modifiedAt": "2025-06-23T09:14:39.112096Z"
},
{
"tableId": 606,
"title": "Tannhelse_DMFT=0_MED_DEKNING",
"publishedAt": "2025-09-04T14:44:50.755457Z",
"modifiedAt": "2025-09-04T14:44:50.755457Z"
},
{
"tableId": 401,
"title": "Valgdeltakelse",
"publishedAt": "2025-09-26T06:51:55.50102Z",
"modifiedAt": "2025-09-26T06:51:55.50102Z"
},
{
"tableId": 338,
"title": "Befolkningssammensetning_antall_andel",
"publishedAt": "2025-10-21T08:50:38.184798Z",
"modifiedAt": "2025-10-21T08:50:38.184798Z"
},
{
"tableId": 185,
"title": "Befolkningsvekst",
"publishedAt": "2025-10-21T08:56:39.806397Z",
"modifiedAt": "2025-10-21T08:56:39.806397Z"
},
{
"tableId": 367,
"title": "Overvekt, kvinner, MFR",
"publishedAt": "2025-10-21T09:02:08.952188Z",
"modifiedAt": "2025-10-21T09:02:08.952188Z"
},
{
"tableId": 699,
"title": "NPR_1",
"publishedAt": "2025-10-21T09:15:04.540704Z",
"modifiedAt": "2025-10-21T09:15:04.540704Z"
},
{
"tableId": 714,
"title": "NPR_3",
"publishedAt": "2025-10-21T10:06:51.087779Z",
"modifiedAt": "2026-01-26T16:14:30.406547Z"
},
{
"tableId": 752,
"title": "Sosialhjelpsmottakere, ettårig",
"publishedAt": "2025-12-02T13:27:15.205756Z",
"modifiedAt": "2025-12-02T13:27:15.205756Z"
},
{
"tableId": 369,
"title": "KPR_3",
"publishedAt": "2025-11-05T12:47:10.174154Z",
"modifiedAt": "2026-02-17T07:03:21.906223Z"
},
{
"tableId": 370,
"title": "KPR_1",
"publishedAt": "2025-11-05T12:46:34.40957Z",
"modifiedAt": "2026-02-17T07:19:49.707508Z"
},
{
"tableId": 187,
"title": "Personer som bor alene",
"publishedAt": "2025-11-18T09:20:23.692617Z",
"modifiedAt": "2025-12-08T09:10:42.87317Z"
},
{
"tableId": 181,
"title": "Utdanningsnivå",
"publishedAt": "2025-11-18T12:35:21.349944Z",
"modifiedAt": "2025-11-18T12:35:21.349944Z"
},
{
"tableId": 511,
"title": "Utdanningsforskjeller i forventet levealder_7aarigLF",
"publishedAt": "2025-12-02T20:40:11.468561Z",
"modifiedAt": "2025-12-03T09:43:16.729127Z"
},
{
"tableId": 510,
"title": "Utdanningsforskjeller i forventet levealder_15aarigLFKB",
"publishedAt": "2025-12-02T20:42:56.504929Z",
"modifiedAt": "2025-12-03T09:55:11.706007Z"
},
{
"tableId": 509,
"title": "Forventet levealder_årligetall_ettårALDER_UTD",
"publishedAt": "2025-12-02T20:56:33.43415Z",
"modifiedAt": "2025-12-03T09:35:51.995297Z"
},
{
"tableId": 507,
"title": "Forventet_levealder_TOT_og_utdn_7aarigLF",
"publishedAt": "2025-12-02T21:06:52.621887Z",
"modifiedAt": "2025-12-03T09:38:09.598497Z"
},
{
"tableId": 508,
"title": "Forventet levealder etter utdn_15aarigLFKB",
"publishedAt": "2025-12-02T21:13:26.471895Z",
"modifiedAt": "2025-12-03T09:39:34.354203Z"
},
{
"tableId": 660,
"title": "Forventet levealder totalt og etter utdanning, 25-årig",
"publishedAt": "2025-12-02T21:17:47.139173Z",
"modifiedAt": "2025-12-03T09:41:53.907439Z"
},
{
"tableId": 186,
"title": "Eierstatus",
"publishedAt": "2025-12-08T09:24:12.660856Z",
"modifiedAt": "2025-12-08T09:24:12.660858Z"
},
{
"tableId": 359,
"title": "Gjennomforing i videregående skole_innvand_3",
"publishedAt": "2025-12-15T10:25:56.152534Z",
"modifiedAt": "2025-12-15T10:25:56.152534Z"
},
{
"tableId": 677,
"title": "Gjennomforing i videregående skole_innvand_1",
"publishedAt": "2025-12-15T10:20:14.864234Z",
"modifiedAt": "2025-12-15T10:20:14.864236Z"
},
{
"tableId": 342,
"title": "Dødsårsaker tiårig",
"publishedAt": "2026-01-12T09:40:25.065037Z",
"modifiedAt": "2026-01-12T09:40:25.065038Z"
},
{
"tableId": 344,
"title": "Selvmord femårig",
"publishedAt": "2026-01-12T09:39:58.096599Z",
"modifiedAt": "2026-01-12T09:39:58.096601Z"
},
{
"tableId": 345,
"title": "Trafikkulykker, femårige tall",
"publishedAt": "2026-01-12T09:39:32.447777Z",
"modifiedAt": "2026-01-12T09:39:32.447779Z"
},
{
"tableId": 343,
"title": "Dødsårsaker-nøkkeltall-1990-ettårig",
"publishedAt": "2026-01-12T09:40:55.35502Z",
"modifiedAt": "2026-01-12T09:40:55.35502Z"
},
{
"tableId": 623,
"title": " Gjennomforing_VGO_utdann_1",
"publishedAt": "2026-01-09T11:02:05.09823Z",
"modifiedAt": "2026-01-09T11:02:05.098231Z"
},
{
"tableId": 360,
"title": "Gjennomforing i videregående skole_utdann_3",
"publishedAt": "2026-01-09T11:02:53.166341Z",
"modifiedAt": "2026-01-09T11:02:53.166342Z"
},
{
"tableId": 361,
"title": "Grunnskolepoeng_INNVKAT",
"publishedAt": "2026-01-08T15:38:28.022599Z",
"modifiedAt": "2026-01-08T15:38:28.022599Z"
},
{
"tableId": 341,
"title": "Drikkevannsforsyning",
"publishedAt": "2026-01-12T08:32:55.070565Z",
"modifiedAt": "2026-01-12T08:32:55.070566Z"
},
{
"tableId": 619,
"title": "Kreft, nye tilfeller_ettårig_LFKB",
"publishedAt": "2026-01-18T13:01:46.594781Z",
"modifiedAt": "2026-01-18T13:01:46.594781Z"
},
{
"tableId": 368,
"title": "Kreft, nye tilfeller_10aarigLFKB",
"publishedAt": "2026-01-18T13:01:08.097792Z",
"modifiedAt": "2026-01-18T13:01:08.097792Z"
},
{
"tableId": 396,
"title": "Vaks_dekning_Influensa",
"publishedAt": "2026-01-13T10:35:14.248503Z",
"modifiedAt": "2026-01-13T10:51:57.052147Z"
},
{
"tableId": 394,
"title": "Vaksinasjonsdekning_1",
"publishedAt": "2026-01-14T14:23:21.353955Z",
"modifiedAt": "2026-01-14T14:23:21.353955Z"
},
{
"tableId": 395,
"title": "Vaksinasjonsdekning_5aar",
"publishedAt": "2026-01-14T14:28:11.323274Z",
"modifiedAt": "2026-01-14T14:28:11.323274Z"
},
{
"tableId": 795,
"title": "Trangbodd_INNVAND",
"publishedAt": "2026-01-20T09:35:28.257245Z",
"modifiedAt": "2026-01-20T09:35:28.257246Z"
},
{
"tableId": 388,
"title": "Overvekt_verneplikt_4",
"publishedAt": "2026-01-16T14:53:42.245577Z",
"modifiedAt": "2026-01-16T14:53:42.245577Z"
},
{
"tableId": 387,
"title": "Overvekt_verneplikt_1",
"publishedAt": "2026-01-16T14:54:33.539299Z",
"modifiedAt": "2026-01-19T13:19:13.71667Z"
},
{
"tableId": 794,
"title": "Trangbodd_UTDANN",
"publishedAt": "2026-01-19T11:04:07.466643Z",
"modifiedAt": "2026-01-19T11:04:07.466643Z"
},
{
"tableId": 353,
"title": "Trening_forsvaret_sesjon1_1",
"publishedAt": "2026-01-20T09:19:15.088638Z",
"modifiedAt": "2026-01-20T09:19:15.088638Z"
},
{
"tableId": 352,
"title": "Trening_forsvaret_sesjon1_3",
"publishedAt": "2026-01-20T09:18:53.707281Z",
"modifiedAt": "2026-01-20T09:18:53.707282Z"
},
{
"tableId": 332,
"title": "Alkohol_Ungdata_KH",
"publishedAt": "2026-01-20T21:37:20.273043Z",
"modifiedAt": "2026-01-21T13:13:23.231866Z"
},
{
"tableId": 339,
"title": "Depressive symptomer_Ungdata_KH",
"publishedAt": "2026-01-20T21:41:49.907708Z",
"modifiedAt": "2026-01-21T13:16:36.980776Z"
},
{
"tableId": 348,
"title": "Ensomhet_Ungdata_KH",
"publishedAt": "2026-01-20T21:45:04.111339Z",
"modifiedAt": "2026-01-21T13:17:10.836688Z"
},
{
"tableId": 349,
"title": "Fornoyd_helsa_Ungdata_KH",
"publishedAt": "2026-01-20T21:47:33.009601Z",
"modifiedAt": "2026-01-21T13:17:36.956513Z"
},
{
"tableId": 354,
"title": "FORTROLIGVENN_Ungdata_KH",
"publishedAt": "2026-01-20T21:50:04.241821Z",
"modifiedAt": "2026-01-21T13:18:06.442229Z"
},
{
"tableId": 356,
"title": "Fritidsorg_deltak_ungdata",
"publishedAt": "2026-01-21T13:20:34.839291Z",
"modifiedAt": "2026-01-21T14:32:48.194358Z"
},
{
"tableId": 357,
"title": "Fysisk_inakt_Ungdata_KH",
"publishedAt": "2026-01-21T14:31:57.063384Z",
"modifiedAt": "2026-01-21T14:31:57.063385Z"
},
{
"tableId": 363,
"title": "Hasjbruk_Ungdata_KH",
"publishedAt": "2026-01-21T14:44:06.197311Z",
"modifiedAt": "2026-02-10T14:25:43.456118Z"
},
{
"tableId": 335,
"title": "Barn av sosialhjelpsmottakere",
"publishedAt": "2026-02-10T09:44:35.054662Z",
"modifiedAt": "2026-02-10T09:44:35.054664Z"
},
{
"tableId": 800,
"title": "Luftforurensning, grenseverdi",
"publishedAt": "2026-02-05T07:45:33.161833Z",
"modifiedAt": "2026-02-05T07:45:33.161834Z"
},
{
"tableId": 787,
"title": "Uføre_UTDANN_1",
"publishedAt": "2026-01-27T14:35:55.553669Z",
"modifiedAt": "2026-02-17T13:02:41.116533Z"
},
{
"tableId": 788,
"title": "Uføre_UTDANN_3",
"publishedAt": "2026-01-27T14:40:35.018853Z",
"modifiedAt": "2026-02-17T13:02:26.012611Z"
},
{
"tableId": 790,
"title": "AAP_UTDANN_1",
"publishedAt": "2026-01-27T14:44:59.329574Z",
"modifiedAt": "2026-01-27T14:44:59.329574Z"
},
{
"tableId": 791,
"title": "AAP_UTDANN_3",
"publishedAt": "2026-01-27T14:47:49.169494Z",
"modifiedAt": "2026-01-30T15:54:29.736474Z"
},
{
"tableId": 803,
"title": "Luftforurensning, PWC (ny)",
"publishedAt": "2026-02-05T07:45:49.979013Z",
"modifiedAt": "2026-02-05T07:45:49.979015Z"
},
{
"tableId": 397,
"title": "TRIVSEL_1",
"publishedAt": "2026-02-03T14:50:45.287376Z",
"modifiedAt": "2026-02-04T11:35:45.312498Z"
},
{
"tableId": 378,
"title": "Mobbing, 7. og 10. klasse_1",
"publishedAt": "2026-02-03T15:08:19.411738Z",
"modifiedAt": "2026-02-03T15:08:19.411738Z"
},
{
"tableId": 805,
"title": "TRIVSEL_3",
"publishedAt": "2026-02-04T11:35:10.518412Z",
"modifiedAt": "2026-02-10T14:13:24.307206Z"
},
{
"tableId": 806,
"title": "MOBBING_3",
"publishedAt": "2026-02-04T11:50:48.897641Z",
"modifiedAt": "2026-02-04T11:50:48.897642Z"
},
{
"tableId": 373,
"title": "Livskvalitet_Ungdata_KH",
"publishedAt": "2026-02-04T12:25:46.313579Z",
"modifiedAt": "2026-02-04T12:25:46.313579Z"
},
{
"tableId": 366,
"title": "Inntektsulikhet",
"publishedAt": "2026-02-06T09:20:40.904633Z",
"modifiedAt": "2026-02-06T09:20:40.904633Z"
},
{
"tableId": 804,
"title": "Sysselsatte_UTDANN_ettårig",
"publishedAt": "2026-02-06T10:22:45.587531Z",
"modifiedAt": "2026-02-06T10:22:45.587531Z"
},
{
"tableId": 807,
"title": "Sysselsatte_INNVKAT_ettårig",
"publishedAt": "2026-02-06T10:22:38.256208Z",
"modifiedAt": "2026-02-06T10:22:38.256208Z"
},
{
"tableId": 175,
"title": "Innvandrere og norskfødte med innv.foreldre _LANDBAK",
"publishedAt": "2026-02-06T14:57:46.6586Z",
"modifiedAt": "2026-02-06T14:57:46.6586Z"
},
{
"tableId": 374,
"title": "Nærmiljø_Ungdata_KH",
"publishedAt": "2026-02-09T11:18:59.078553Z",
"modifiedAt": "2026-02-09T11:18:59.078555Z"
},
{
"tableId": 375,
"title": "Treffsteder for unge_Ungdata_KH",
"publishedAt": "2026-02-09T11:26:27.884848Z",
"modifiedAt": "2026-02-09T11:26:27.88485Z"
},
{
"tableId": 380,
"title": "Regelbrudd_Ungdata_KH",
"publishedAt": "2026-02-09T12:34:43.499082Z",
"modifiedAt": "2026-02-09T12:34:43.499083Z"
},
{
"tableId": 389,
"title": "Skjermtid_Ungdata_KH",
"publishedAt": "2026-02-09T12:38:50.920167Z",
"modifiedAt": "2026-02-09T12:38:50.920169Z"
},
{
"tableId": 390,
"title": "Smertestillende_ungdata",
"publishedAt": "2026-02-09T12:42:44.0123Z",
"modifiedAt": "2026-02-09T12:42:44.012302Z"
},
{
"tableId": 399,
"title": "Trygghet_ungdata",
"publishedAt": "2026-02-09T12:47:30.435923Z",
"modifiedAt": "2026-02-09T12:47:30.435925Z"
},
{
"tableId": 650,
"title": "INNVAND_INNVKAT",
"publishedAt": "2026-02-10T09:09:07.520257Z",
"modifiedAt": "2026-02-10T09:09:07.520259Z"
},
{
"tableId": 371,
"title": "Leseferdighet_UTDANN_1",
"publishedAt": "2026-02-10T10:35:48.263727Z",
"modifiedAt": "2026-02-10T11:17:59.825692Z"
},
{
"tableId": 670,
"title": "Leseferdighet_utdann_3",
"publishedAt": "2026-02-10T10:42:04.095025Z",
"modifiedAt": "2026-02-10T10:42:04.095025Z"
},
{
"tableId": 689,
"title": "Regneferd_INNVKAT_1",
"publishedAt": "2026-02-10T11:17:08.620557Z",
"modifiedAt": "2026-02-10T11:17:08.620557Z"
},
{
"tableId": 641,
"title": "Regneferd_INNVKAT_3",
"publishedAt": "2026-02-10T11:25:22.449729Z",
"modifiedAt": "2026-02-10T11:25:22.449729Z"
},
{
"tableId": 809,
"title": "NEET_UTDANN",
"publishedAt": "2026-02-10T19:20:19.064Z",
"modifiedAt": "2026-02-17T13:02:00.715683Z"
},
{
"tableId": 810,
"title": "Andrenarko_Ungdata",
"publishedAt": "2026-03-09T15:21:38.643178Z",
"modifiedAt": "2026-03-10T13:02:04.859057Z"
},
{
"tableId": 811,
"title": "Hasjtilbud_Ungdata",
"publishedAt": "2026-03-09T15:21:24.178186Z",
"modifiedAt": "2026-03-09T15:21:24.178187Z"
},
{
"tableId": 600,
"title": "Kollektivtilbud, ungdom",
"publishedAt": "2026-02-10T16:22:04.374466Z",
"modifiedAt": "2026-02-11T09:51:32.652556Z"
},
{
"tableId": 812,
"title": "Røyk_Ungdata",
"publishedAt": "2026-03-09T15:21:08.614525Z",
"modifiedAt": "2026-03-09T15:21:08.614526Z"
},
{
"tableId": 813,
"title": "Skulketskolen_Ungdata",
"publishedAt": "2026-03-09T13:40:23.13494Z",
"modifiedAt": "2026-03-09T13:40:23.134986Z"
},
{
"tableId": 814,
"title": "Snus_Ungdata",
"publishedAt": "2026-03-09T15:20:44.524352Z",
"modifiedAt": "2026-03-09T15:20:44.524353Z"
},
{
"tableId": 815,
"title": "Vape_Ungdata",
"publishedAt": "2026-03-09T15:20:20.568002Z",
"modifiedAt": "2026-03-09T15:20:20.568003Z"
},
{
"tableId": 607,
"title": "Venner_inne_Ungdata",
"publishedAt": "2026-02-10T20:23:12.81944Z",
"modifiedAt": "2026-02-11T09:52:40.767342Z"
},
{
"tableId": 379,
"title": "NEET_INNVKAT",
"publishedAt": "2026-02-10T19:44:15.283028Z",
"modifiedAt": "2026-02-10T19:44:15.283028Z"
},
{
"tableId": 608,
"title": "Venner_ute_Ungdata",
"publishedAt": "2026-02-10T20:26:35.178965Z",
"modifiedAt": "2026-02-11T09:56:08.051697Z"
},
{
"tableId": 672,
"title": "Regneferd_UTDANN_1",
"publishedAt": "2026-02-12T14:28:00.419856Z",
"modifiedAt": "2026-02-12T14:28:00.419856Z"
},
{
"tableId": 350,
"title": "Forsvaret_Svomming_3",
"publishedAt": "2026-02-12T13:26:54.37389Z",
"modifiedAt": "2026-02-12T13:26:54.37389Z"
},
{
"tableId": 671,
"title": "Leseferdighet_innvkat_1",
"publishedAt": "2026-02-16T12:22:31.91486Z",
"modifiedAt": "2026-02-16T12:22:31.91486Z"
},
{
"tableId": 351,
"title": "Forsvaret_Svomming_1",
"publishedAt": "2026-02-16T13:58:22.362566Z",
"modifiedAt": "2026-02-16T13:58:22.362566Z"
},
{
"tableId": 372,
"title": "Leseferdighet_INNVKAT_3",
"publishedAt": "2026-02-17T12:15:42.934827Z",
"modifiedAt": "2026-02-17T12:15:42.934827Z"
},
{
"tableId": 826,
"title": "SYKEFRAV_SSB",
"publishedAt": "2026-02-23T07:05:48.431171Z",
"modifiedAt": "2026-02-23T07:49:54.660396Z"
},
{
"tableId": 583,
"title": "Vedvarende lavinntekt INNVKAT",
"publishedAt": "2026-02-23T08:44:50.032799Z",
"modifiedAt": "2026-02-23T08:44:50.032808Z"
},
{
"tableId": 839,
"title": "ROYK_STATBANK",
"publishedAt": "2026-03-04T12:48:25.930929Z",
"modifiedAt": "2026-03-04T12:48:25.93093Z"
},
{
"tableId": 346,
"title": "Barn av enslige forsørgere_3aarigLFKB",
"publishedAt": "2026-03-09T08:48:17.960139Z",
"modifiedAt": "2026-03-09T08:48:17.960139Z"
},
{
"tableId": 391,
"title": "Søvnproblemer_Ungdata_KH",
"publishedAt": "2026-03-11T11:11:19.625366Z",
"modifiedAt": "2026-03-11T11:11:19.625366Z"
},
{
"tableId": 852,
"title": "Andrenarko_Ungdata_VGO",
"publishedAt": "2026-03-23T10:36:27.084294Z",
"modifiedAt": "2026-03-23T10:36:27.084296Z"
},
{
"tableId": 859,
"title": "Hasjbruk_Ungdata_VGO",
"publishedAt": "2026-03-23T10:35:53.428246Z",
"modifiedAt": "2026-03-23T10:35:53.428247Z"
},
{
"tableId": 860,
"title": "Røyk_Ungdata_VGO",
"publishedAt": "2026-03-23T10:39:47.298722Z",
"modifiedAt": "2026-03-23T10:39:47.298723Z"
},
{
"tableId": 862,
"title": "Snus_Ungdata_VGO",
"publishedAt": "2026-03-23T10:34:30.503178Z",
"modifiedAt": "2026-03-23T10:34:30.503179Z"
},
{
"tableId": 863,
"title": "Vape_Ungdata_VGO",
"publishedAt": "2026-03-23T10:37:45.600809Z",
"modifiedAt": "2026-03-23T10:37:45.600809Z"
},
{
"tableId": 337,
"title": "Barnevern_tiltak",
"publishedAt": "2026-03-24T09:13:05.651799Z",
"modifiedAt": "2026-03-24T09:13:05.651799Z"
},
{
"tableId": 393,
"title": "Røyking_MFR",
"publishedAt": "2026-03-24T09:23:03.575448Z",
"modifiedAt": "2026-03-24T09:23:03.575448Z"
}
]

48
tests/test_cache.py Normal file
View File

@@ -0,0 +1,48 @@
"""Tests for cache module."""
import time
from fhi_statistikk_mcp.cache import TTLCache
def test_set_and_get():
cache = TTLCache()
cache.set("key", "value", 60)
assert cache.get("key") == "value"
def test_get_missing_key():
cache = TTLCache()
assert cache.get("nonexistent") is None
def test_expiry():
cache = TTLCache()
cache.set("key", "value", 0.1)
time.sleep(0.15)
assert cache.get("key") is None
def test_clear():
cache = TTLCache()
cache.set("a", 1, 60)
cache.set("b", 2, 60)
cache.clear()
assert cache.get("a") is None
assert cache.get("b") is None
def test_overwrite():
cache = TTLCache()
cache.set("key", "old", 60)
cache.set("key", "new", 60)
assert cache.get("key") == "new"
def test_different_ttls():
cache = TTLCache()
cache.set("short", "value", 0.1)
cache.set("long", "value", 60)
time.sleep(0.15)
assert cache.get("short") is None
assert cache.get("long") == "value"

366
tests/test_transformers.py Normal file
View File

@@ -0,0 +1,366 @@
"""Tests for transformers module."""
import pytest
from fhi_statistikk_mcp.transformers import (
complete_query_dimensions,
extract_metadata_fields,
flatten_categories,
is_year_dimension,
matches_search,
navigate_hierarchy,
normalize_for_search,
normalize_year_value,
parse_csv_to_rows,
strip_html,
summarize_dimensions,
)
# --- strip_html ---
def test_strip_html_removes_tags():
assert strip_html("<p>Hello <b>world</b></p>") == "Hello world"
def test_strip_html_preserves_plain_text():
assert strip_html("No tags here") == "No tags here"
def test_strip_html_handles_empty():
assert strip_html("") == ""
assert strip_html(None) is None
def test_strip_html_handles_links():
assert strip_html('<a href="http://example.com">link</a>') == "link"
def test_strip_html_decodes_entities():
assert strip_html("&amp; &lt;b&gt; &nbsp;") == "& <b>"
# --- normalize_for_search / matches_search ---
def test_normalize_strips_accents():
assert normalize_for_search("Tromsø") == "tromso"
assert normalize_for_search("Bærum") == "barum"
assert normalize_for_search("Ålesund") == "alesund"
def test_normalize_lowercases():
assert normalize_for_search("OSLO") == "oslo"
def test_matches_search_single_word():
assert matches_search("Befolkningsvekst", "befolkning")
assert not matches_search("Befolkningsvekst", "helse")
def test_matches_search_multiple_words():
assert matches_search("Befolkningsvekst Oslo", "befolkning oslo")
assert not matches_search("Befolkningsvekst", "befolkning oslo")
def test_matches_search_accent_insensitive():
assert matches_search("Tromsø kommune", "tromso")
assert matches_search("Bærum", "barum")
# --- normalize_year_value ---
def test_normalize_year_short():
assert normalize_year_value("2020") == "2020_2020"
def test_normalize_year_already_full():
assert normalize_year_value("2020_2020") == "2020_2020"
def test_normalize_year_non_numeric():
assert normalize_year_value("all") == "all"
# --- flatten_categories ---
NESTED_TREE = [
{
"value": "0",
"label": "Hele landet",
"children": [
{
"value": "03",
"label": "Oslo (fylke)",
"children": [
{"value": "0301", "label": "Oslo", "children": []},
],
},
{
"value": "18",
"label": "Nordland",
"children": [
{"value": "1804", "label": "Bodø", "children": []},
{"value": "1806", "label": "Narvik", "children": []},
],
},
],
},
]
def test_flatten_categories_count():
flat = flatten_categories(NESTED_TREE)
assert len(flat) == 6
def test_flatten_categories_parent_values():
flat = flatten_categories(NESTED_TREE)
by_value = {c["value"]: c for c in flat}
assert by_value["0"]["parent_value"] is None
assert by_value["03"]["parent_value"] == "0"
assert by_value["0301"]["parent_value"] == "03"
assert by_value["1804"]["parent_value"] == "18"
def test_flatten_categories_empty():
assert flatten_categories([]) == []
# --- navigate_hierarchy ---
def test_navigate_top_level():
result = navigate_hierarchy(NESTED_TREE)
assert len(result) == 1
assert result[0]["value"] == "0"
assert result[0]["child_count"] == 2
def test_navigate_children():
result = navigate_hierarchy(NESTED_TREE, parent_value="18")
assert len(result) == 2
values = {r["value"] for r in result}
assert values == {"1804", "1806"}
def test_navigate_search():
result = navigate_hierarchy(NESTED_TREE, search="bodø")
assert len(result) == 1
assert result[0]["value"] == "1804"
def test_navigate_search_accent_insensitive():
result = navigate_hierarchy(NESTED_TREE, search="bodo")
assert len(result) == 1
assert result[0]["label"] == "Bodø"
# --- summarize_dimensions ---
def test_summarize_fixed_dimension():
dims = [{"code": "KJONN", "label": "Kjønn", "categories": [
{"value": "0", "label": "kjønn samlet", "children": []}
]}]
result = summarize_dimensions(dims)
assert len(result) == 1
assert result[0]["is_fixed"] is True
assert result[0]["total_categories"] == 1
def test_summarize_year_dimension():
cats = [{"value": f"{y}_{y}", "label": str(y), "children": []}
for y in range(2020, 2025)]
dims = [{"code": "AAR", "label": "År", "categories": cats}]
result = summarize_dimensions(dims)
assert result[0]["value_format"] == "YYYY_YYYY (e.g. 2020_2020)"
assert result[0]["range"] == "2020..2024"
def test_summarize_hierarchical_large():
children = [{"value": str(i), "label": f"Municipality {i}", "children": []}
for i in range(1, 30)]
cats = [{"value": "0", "label": "Hele landet", "children": children}]
dims = [{"code": "GEO", "label": "Geografi", "categories": cats}]
result = summarize_dimensions(dims)
assert result[0]["is_hierarchical"] is True
assert "top_level_values" in result[0]
assert result[0]["top_level_values"][0]["child_count"] == 29
def test_summarize_small_dimension():
cats = [
{"value": "TELLER", "label": "antall", "children": []},
{"value": "RATE", "label": "prosent", "children": []},
]
dims = [{"code": "MEASURE_TYPE", "label": "Måltall", "categories": cats}]
result = summarize_dimensions(dims)
assert len(result[0]["values"]) == 2
assert result[0]["values"][0] == {"value": "TELLER", "label": "antall"}
# --- extract_metadata_fields ---
def test_extract_metadata_dict():
meta = {
"name": "Test",
"isOfficialStatistics": True,
"paragraphs": [
{"header": "Beskrivelse", "content": "<p>Some description</p>"},
{"header": "Oppdateringsfrekvens", "content": "Årlig"},
{"header": "Nøkkelord", "content": "Helse,Data"},
{"header": "Kildeinstitusjon", "content": "FHI"},
],
}
fields = extract_metadata_fields(meta)
assert fields["is_official_statistics"] is True
assert fields["description"] == "Some description"
assert fields["update_frequency"] == "Årlig"
assert fields["keywords"] == ["Helse", "Data"]
assert fields["source_institution"] == "FHI"
def test_extract_metadata_strips_html():
meta = {
"paragraphs": [
{"header": "Beskrivelse", "content": "<p>Text with <a href='#'>link</a></p>"},
],
}
fields = extract_metadata_fields(meta)
assert fields["description"] == "Text with link"
# --- parse_csv_to_rows ---
def test_parse_csv_basic():
csv_text = '"Col A";"Col B"\n"Oslo";"123"\n"Bergen";"456"\n'
result = parse_csv_to_rows(csv_text)
assert result["total_rows"] == 2
assert result["truncated"] is False
assert result["rows"][0]["Col A"] == "Oslo"
assert result["rows"][0]["Col B"] == 123
def test_parse_csv_truncation():
csv_text = '"X"\n"a"\n"b"\n"c"\n'
result = parse_csv_to_rows(csv_text, max_rows=2)
assert result["total_rows"] == 3
assert result["truncated"] is True
assert len(result["rows"]) == 2
def test_parse_csv_numeric_conversion():
csv_text = '"int";"float";"missing";"text"\n"42";"3.14";"..";"hello"\n'
result = parse_csv_to_rows(csv_text)
row = result["rows"][0]
assert row["int"] == 42
assert row["float"] == 3.14
assert row["missing"] is None
assert row["text"] == "hello"
def test_parse_csv_comma_decimal():
csv_text = '"val"\n"1,5"\n'
result = parse_csv_to_rows(csv_text)
assert result["rows"][0]["val"] == 1.5
# --- is_year_dimension ---
def test_is_year_by_code():
assert is_year_dimension("AAR", []) is True
assert is_year_dimension("YEAR", []) is True
assert is_year_dimension("GEO", []) is False
def test_is_year_by_value_format():
flat = [{"value": "2020_2020", "label": "2020", "parent_value": None}]
assert is_year_dimension("CUSTOM", flat) is True
# --- complete_query_dimensions ---
SAMPLE_DIMS = [
{"code": "GEO", "label": "Geografi", "categories": NESTED_TREE},
{"code": "AAR", "label": "År", "categories": [
{"value": "2023_2023", "label": "2023", "children": []},
{"value": "2024_2024", "label": "2024", "children": []},
]},
{"code": "KJONN", "label": "Kjønn", "categories": [
{"value": "0", "label": "kjønn samlet", "children": []},
]},
{"code": "ALDER", "label": "Alder", "categories": [
{"value": "0_120", "label": "alle aldre", "children": []},
]},
{"code": "MEASURE_TYPE", "label": "Måltall", "categories": [
{"value": "TELLER", "label": "antall", "children": []},
{"value": "RATE", "label": "prosent", "children": []},
]},
]
def test_complete_dims_fixed_auto_included():
user_dims = [
{"code": "GEO", "filter": "item", "values": ["0301"]},
{"code": "AAR", "filter": "bottom", "values": ["1"]},
]
result = complete_query_dimensions(SAMPLE_DIMS, user_dims)
codes = {d["code"] for d in result}
assert "KJONN" in codes
assert "ALDER" in codes
kjonn = next(d for d in result if d["code"] == "KJONN")
assert kjonn["values"] == ["0"]
def test_complete_dims_measure_type_defaults_to_all():
user_dims = [
{"code": "GEO", "filter": "item", "values": ["0"]},
{"code": "AAR", "filter": "item", "values": ["2024"]},
]
result = complete_query_dimensions(SAMPLE_DIMS, user_dims)
mt = next(d for d in result if d["code"] == "MEASURE_TYPE")
assert mt["filter"] == "all"
assert mt["values"] == ["*"]
def test_complete_dims_year_normalization():
user_dims = [
{"code": "GEO", "filter": "item", "values": ["0"]},
{"code": "AAR", "filter": "item", "values": ["2024"]},
]
result = complete_query_dimensions(SAMPLE_DIMS, user_dims)
aar = next(d for d in result if d["code"] == "AAR")
assert aar["values"] == ["2024_2024"]
def test_complete_dims_missing_required_raises():
user_dims = [
{"code": "AAR", "filter": "item", "values": ["2024"]},
]
with pytest.raises(ValueError, match="Missing required dimensions.*GEO"):
complete_query_dimensions(SAMPLE_DIMS, user_dims)
def test_complete_dims_missing_code_key_raises():
user_dims = [{"filter": "item", "values": ["0"]}]
with pytest.raises(ValueError, match="missing 'code' key"):
complete_query_dimensions(SAMPLE_DIMS, user_dims)
def test_complete_dims_case_insensitive():
user_dims = [
{"code": "geo", "filter": "item", "values": ["0"]},
{"code": "aar", "filter": "item", "values": ["2024"]},
]
result = complete_query_dimensions(SAMPLE_DIMS, user_dims)
codes = [d["code"] for d in result]
assert "GEO" in codes
assert "AAR" in codes
def test_complete_dims_no_year_normalization_for_top_filter():
user_dims = [
{"code": "GEO", "filter": "item", "values": ["0"]},
{"code": "AAR", "filter": "top", "values": ["3"]},
]
result = complete_query_dimensions(SAMPLE_DIMS, user_dims)
aar = next(d for d in result if d["code"] == "AAR")
assert aar["values"] == ["3"] # not "3_3"