Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
* [743](https://github.com/dbekaert/RAiDER/pull/743) - Switched from HTTPS to DAP4 for retrieving MERRA2 data, and suppressed a warning for using DAP4 for GMAO data where doing so is not possible.

### Fixed
* [794](https://github.com/dbekaert/RAiDER/pull/794) - Circumvent ongoing migration of data in the UNR archive such that data from much of the eastern hemisphere is disrupted after 2023.
* [787](https://github.com/dbekaert/RAiDER/pull/787) - Updated weather model uncertainty estimation and included error thresholds to discard unreliable observations.
* [782](https://github.com/dbekaert/RAiDER/pull/782) - Fixed bug with handling corrupted or non-existent UNR hosted GNSS ZIP files.
* [781](https://github.com/dbekaert/RAiDER/pull/781) - In the combine workflow, accurately pass and write out matching midnight datetimes.
Expand Down
36 changes: 28 additions & 8 deletions test/test_downloadGNSS.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import pytest
import requests
from unittest import mock
Expand Down Expand Up @@ -92,18 +93,37 @@ def test_download_UNR(tmp_path):
assert outDict["path"] == expected_path


def test_download_UNR_2():
def test_download_UNR_2(caplog):
statID = "MORZ"
year = 2000
with pytest.raises(ValueError):
download_UNR(statID, year, download=True)


def test_download_UNR_3():

# Capture logs at the WARNING level and above
with caplog.at_level(logging.WARNING):
result = download_UNR(statID, year, download=True)

# 1. Assert the correct warning was logged
expected_warning = f"Skipping {statID}: Not found in either archive for {year}."
assert expected_warning in caplog.text

# 2. Assert the function returns the expected dictionary with a falsy path
assert result["ID"] == statID
assert result["year"] == year
assert not result["path"] # Asserts path is None, False, or empty string


def test_download_UNR_3(caplog):
statID = "DUMY"
year = 2020
with pytest.raises(ValueError):
download_UNR(statID, year, download=True)

with caplog.at_level(logging.WARNING):
result = download_UNR(statID, year, download=True)

expected_warning = f"Skipping {statID}: Not found in either archive for {year}."
assert expected_warning in caplog.text

assert result["ID"] == statID
assert result["year"] == year
assert not result["path"]


def test_download_UNR_4():
Expand Down
105 changes: 84 additions & 21 deletions tools/RAiDER/gnss/downloadGNSSDelays.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,45 @@ def get_station_list(

def get_stats_by_llh(llhBox=None, baseURL=_UNR_URL):
"""
Function to pull lat, lon, height, beginning date, end date, and number of solutions for stations inside the bounding box llhBox.
llhBox should be a tuple in SNWE format.
Pull lat, lon, height for stations inside the bounding box from
both legacy and IGS20 UNR holdings. Prioritizes IGS20 coordinates.
"""
if llhBox is None:
llhBox = [-90, 90, -180, 180]
S, N, W, E = llhBox

stationHoldings = f'{baseURL}NGLStationPages/llh.out'
# it's a file like object and works just like a file
url_legacy = f'{baseURL}NGLStationPages/llh.out'
url_igs20 = f'{baseURL}gps_timeseries/IGS20/llh/llh.out'
col_names = ['ID', 'Lat', 'Lon', 'Hgt_m']

stations = pd.read_csv(stationHoldings, sep=r'\s+', names=['ID', 'Lat', 'Lon', 'Hgt_m'])
# 1. Fetch IGS20 list
try:
stat_igs = pd.read_csv(url_igs20, sep=r'\s+', names=col_names)
stat_igs['archive'] = 'igs20'
except Exception as e:
logger.warning("Failed to fetch IGS20 llh.out: %s", e)
stat_igs = pd.DataFrame(columns=col_names + ['archive'])

# convert lons from [-360, 0] to [-180, 180]
stations['Lon'] = ((stations['Lon'].values + 180) % 360) - 180
# 2. Fetch Legacy list
try:
stat_leg = pd.read_csv(url_legacy, sep=r'\s+', names=col_names)
stat_leg['archive'] = 'legacy'
except Exception as e:
logger.warning("Failed to fetch legacy llh.out: %s", e)
stat_leg = pd.DataFrame(columns=col_names + ['archive'])

# 3. Merge and prioritize IGS20
stats_combined = pd.concat([stat_igs, stat_leg], ignore_index=True)
stats_combined = (
stats_combined.drop_duplicates(subset=['ID'], keep='first')
.reset_index(drop=True)
)

stations = filterToBBox(stations, llhBox)
# Convert lons from [-360, 0] to [-180, 180]
stats_combined['Lon'] = ((stats_combined['Lon'].values + 180) % 360) - 180

return stations
stats_combined = filterToBBox(stats_combined, llhBox)

return stats_combined


def download_tropo_delays(
Expand Down Expand Up @@ -142,7 +163,7 @@ def download_tropo_delays(
def download_UNR(statID, year, writeDir=".", download=False, baseURL=_UNR_URL):
"""
Download a zip file containing tropospheric delays for a given
station and year.
station and year, with a fallback to the legacy archive.

The URL format is:
http://geodesy.unr.edu/gps_timeseries/IGS20/trop/<ssss>/
Expand Down Expand Up @@ -173,23 +194,65 @@ def download_UNR(statID, year, writeDir=".", download=False, baseURL=_UNR_URL):
f"Data repository {baseURL} has not yet been implemented"
)

URL = (
stat_upper = statID.upper()

# First attempt: IGS20 framework
url_igs20 = (
f"{baseURL}gps_timeseries/IGS20/trop/"
f"{statID.upper()}/{statID.upper()}.{year}.trop.zip"
f"{stat_upper}/{stat_upper}.{year}.trop.zip"
)

# Fallback: Legacy operational framework
url_legacy = (
f"{baseURL}gps_timeseries/trop/"
f"{stat_upper}/{stat_upper}.{year}.trop.zip"
)

logger.debug("Currently checking station %s in %s", statID, year)
logger.debug("Checking station %s in %s", statID, year)

if download:
saveLoc = os.path.abspath(
os.path.join(writeDir, f"{statID.upper()}.{year}.trop.zip")
)
filepath = download_url(URL, saveLoc)
if filepath == "":
raise ValueError("Year or station ID does not exist")
filename = f"{stat_upper}.{year}.trop.zip"
save_loc = os.path.abspath(os.path.join(writeDir, filename))

# Try IGS20 first
filepath = download_url(url_igs20, save_loc)

# If IGS20 is missing, try legacy

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this work if a subset of stations is available but others are not?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! Before if a user requests a list of 10 stations and the 3rd one is missing from both archives, the script will crash immediately. It will fail to process the remaining 7 stations, even if they are perfectly valid and exist in the archives. This was the existing behavior.

Since we're iterating over stations/years, we should not raise an exception that halts the entire script. Instead, I tweaked it now to log a warning that the specific station/year failed, and use continue to move on to the next iteration.

if not filepath:
logger.debug(
"IGS20 not found for %s in %s. Trying legacy.",
statID,
year,
)
filepath = download_url(url_legacy, save_loc)

# If BOTH fail, just log a warning. Do not raise ValueError.
if not filepath:
logger.warning(
"Skipping %s: Not found in either archive for %s.",
statID,
year,
)

else:
filepath = check_url(URL)
filepath = check_url(url_igs20)
if not filepath:
logger.debug(
"IGS20 not found for %s in %s. Checking legacy.",
statID,
year,
)
filepath = check_url(url_legacy)

if not filepath:
logger.warning(
"Skipping %s: Not found in either archive for %s.",
statID,
year,
)

# If filepath is None/False, the caller's list comprehension will safely
# ignore this dictionary because of `if fileurl['path']`
return {"ID": statID, "year": year, "path": filepath}


Expand Down
Loading