Skip to content

Commit 722c9d5

Browse files
committed
Use pdbufr to read DWD radar data in bufr format into DataFrame
1 parent 739c49b commit 722c9d5

File tree

6 files changed

+129
-1
lines changed

6 files changed

+129
-1
lines changed

.github/workflows/install.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ if [ "${flavor}" = "testing" ]; then
2424
--extras=radar \
2525
--extras=radarplus \
2626
--extras=restapi \
27-
--extras=sql
27+
--extras=sql \
28+
--extras=bufr
2829

2930
elif [ "${flavor}" = "docs" ]; then
3031
poetry install --verbose --no-interaction --with=docs --extras=interpolation

.github/workflows/tests.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ jobs:
8383
brew install eccodes
8484
export WD_ECCODES_DIR=$(brew --prefix eccodes)
8585
86+
- name: Install eccodes (Mac only)
87+
run: |
88+
if [ "$RUNNER_OS" == "macOS" ]; then
89+
brew install eccodes && export WD_ECCODES_DIR=$(brew --prefix eccodes)
90+
fi
91+
8692
- name: Install project
8793
run: .github/workflows/install.sh testing
8894

tests/provider/dwd/radar/test_api_historic.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99
from dirty_equals import IsDatetime, IsDict, IsInt, IsList, IsNumeric, IsStr
1010

11+
from wetterdienst.eccodes import ensure_eccodes
1112
from wetterdienst.provider.dwd.radar import (
1213
DwdRadarDataFormat,
1314
DwdRadarDataSubset,
@@ -516,6 +517,26 @@ def test_radar_request_site_historic_pe_bufr(default_settings):
516517
decoder = pybufrkit.decoder.Decoder()
517518
decoder.process(payload, info_only=True)
518519

520+
if ensure_eccodes():
521+
df = results[0].df
522+
523+
assert not df.empty
524+
525+
print(df.dropna().query("value != 0"))
526+
527+
assert df.columns.tolist() == [
528+
"station_id",
529+
"latitude",
530+
"longitude",
531+
"height",
532+
"projectionType",
533+
"pictureType",
534+
"date",
535+
"echotops",
536+
]
537+
538+
assert not df.dropna().empty
539+
519540

520541
@pytest.mark.xfail(reason="month_year not matching start_date")
521542
@pytest.mark.remote
@@ -569,6 +590,13 @@ def test_radar_request_site_historic_pe_timerange(fmt, default_settings):
569590
)
570591
assert re.match(bytes(header, encoding="ascii"), payload[:115])
571592

593+
first = results[0]
594+
595+
if fmt == DwdRadarDataFormat.BUFR:
596+
assert not first.df.dropna().empty
597+
598+
assert first.df.columns == [""]
599+
572600

573601
@pytest.mark.remote
574602
def test_radar_request_site_historic_px250_bufr_yesterday(default_settings):
@@ -637,6 +665,10 @@ def test_radar_request_site_historic_px250_bufr_timerange(default_settings):
637665

638666
assert len(results) == 12
639667

668+
first = results[0]
669+
670+
assert not first.df.dropna().empty
671+
640672

641673
@pytest.mark.remote
642674
def test_radar_request_site_historic_sweep_vol_v_hdf5_yesterday(default_settings):

tests/test_settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def test_default_settings(caplog):
3232
default_settings = Settings.default()
3333
assert not default_settings.cache_disable
3434
assert re.match(WD_CACHE_DIR_PATTERN, default_settings.cache_dir)
35+
assert default_settings.eccodes_dir is None
3536
assert default_settings.fsspec_client_kwargs == {}
3637
assert default_settings.ts_humanize
3738
assert default_settings.ts_shape == "long"
@@ -44,6 +45,7 @@ def test_default_settings(caplog):
4445
"precipitation_height": 20.0,
4546
}
4647
assert default_settings.ts_interpolation_use_nearby_station_distance == 1
48+
assert not default_settings.read_bufr
4749
log_message = caplog.messages[0]
4850
assert re.match(WD_CACHE_ENABLED_PATTERN, log_message)
4951

wetterdienst/eccodes.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright (c) 2018-2022, earthobservations developers.
3+
# Distributed under the MIT License. See LICENSE for more info.
4+
def ensure_eccodes() -> bool:
5+
"""Function to ensure that eccodes is loaded"""
6+
try:
7+
import eccodes
8+
9+
eccodes.eccodes.codes_get_api_version()
10+
except (ModuleNotFoundError, RuntimeError):
11+
return False
12+
13+
return True

wetterdienst/provider/dwd/radar/api.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@
5555

5656
log = logging.getLogger(__name__)
5757

58+
BUFR_PARAMETER_MAPPING = {
59+
DwdRadarParameter.PE_ECHO_TOP: ["echoTops"],
60+
DwdRadarParameter.PG_REFLECTIVITY: ["horizontalReflectivity"],
61+
DwdRadarParameter.LMAX_VOLUME_SCAN: ["horizontalReflectivity"],
62+
DwdRadarParameter.PX250_REFLECTIVITY: ["horizontalReflectivity"],
63+
}
64+
65+
ECCODES_FOUND = ensure_eccodes()
66+
5867

5968
@dataclass
6069
class RadarResult:
@@ -64,6 +73,8 @@ class RadarResult:
6473
"""
6574

6675
data: BytesIO
76+
# placeholder for bufr files, which are read into pandas.DataFrame if eccodes available
77+
df: pl.DataFrame = field(default_factory=pl.DataFrame)
6778
timestamp: dt.datetime = None
6879
url: str = None
6980
filename: str = None
@@ -415,6 +426,69 @@ def query(self) -> Iterator[RadarResult]:
415426
verify_hdf5(result.data)
416427
except Exception as e: # pragma: no cover
417428
log.exception(f"Unable to read HDF5 file. {e}")
429+
430+
if self.format == DwdRadarDataFormat.BUFR:
431+
if ECCODES_FOUND and self.settings.read_bufr:
432+
buffer = result.data
433+
434+
# TODO: pdbufr currently doesn't seem to allow reading directly from BytesIO
435+
tf = tempfile.NamedTemporaryFile("w+b")
436+
tf.write(buffer.read())
437+
tf.seek(0)
438+
439+
df = pdbufr.read_bufr(
440+
tf.name,
441+
columns="data",
442+
flat=True
443+
)
444+
445+
value_vars = []
446+
parameters = BUFR_PARAMETER_MAPPING[self.parameter]
447+
for par in parameters:
448+
value_vars.extend([col for col in df.columns if par in col])
449+
value_vars = set(value_vars)
450+
id_vars = df.columns.difference(value_vars)
451+
id_vars = [iv for iv in id_vars if iv.startswith("#1#")]
452+
453+
df = df.melt(id_vars=id_vars,var_name="parameter",value_vars=value_vars, value_name="value")
454+
df.columns = [col[3:] if col.startswith("#1#") else col for col in df.columns]
455+
456+
df = df.rename(
457+
columns={
458+
"stationNumber": Columns.STATION_ID.value,
459+
"latitude": Columns.LATITUDE.value,
460+
"longitude": Columns.LONGITUDE.value,
461+
"heightOfStation": Columns.HEIGHT.value,
462+
}
463+
)
464+
465+
466+
# df[Columns.STATION_ID.value] = df[Columns.STATION_ID.value].astype(int).astype(str)
467+
468+
date_columns = ["year", "month", "day", "hour", "minute"]
469+
dates = df.loc[:, date_columns].apply(
470+
lambda x: datetime(
471+
year=x.year, month=x.month, day=x.day, hour=x.hour, minute=x.minute
472+
),
473+
axis=1,
474+
)
475+
df.insert(len(df.columns) - 1, Columns.DATE.value, dates)
476+
df = df.drop(columns=date_columns)
477+
478+
def split_index_parameter(text: str):
479+
split_index = text.index("#", 1)
480+
if split_index == -1:
481+
return text, None
482+
index = text[1:split_index]
483+
parameter = text[split_index+1:]
484+
return parameter, float(index)
485+
486+
df[["parameter", "index"]] = df.pop("parameter").map(split_index_parameter).tolist()
487+
488+
df = df.sort_values(["parameter", "index"])
489+
490+
result.df = df
491+
418492
yield result
419493

420494
@staticmethod

0 commit comments

Comments
 (0)