-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlpisSlovakia.R
More file actions
105 lines (86 loc) · 3.46 KB
/
lpisSlovakia.R
File metadata and controls
105 lines (86 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# ----
# title : build occurrence database - _INESRT
# description : this script integrates data of '_INSERT' (LINK)
# license : https://creativecommons.org/licenses/by-sa/4.0/
# authors : Peter Pothmann, Steffen Ehrmann
# date : 2024-MM-DD
# version : 0.0.0
# status : find data, update, inventarize, validate, normalize, done
# comment : file.edit(paste0(dir_docs, "/documentation/04_build_occurrence_database.md"))
# ----
# doi/url : _INSERT
# license : _INSERT
# geography : _INSERT
# period : _INSERT
# variables :
# - cover : _INSERT
# - use : _INSERT
# sampling : _INSERT
# purpose : _INSERT
# data type : _INSERT
# features : _INSERT
# ----
thisDataset <- "lpisSlovakia"
message("\n---- ", thisDataset, " ----")
thisDir <- paste0(dir_occurr_data, thisDataset, "/")
message(" --> handling metadata")
regDataseries(name = thisDataset,
description = _INSERT,
homepage = _INSERT,
version = _INSERT,
licence_link = _INSERT,
reference = read.bib(paste0(thisDir, "_INSERT.bib")))
new_source(name = thisDataset, date = ymd(_INSERT), ontology = path_onto_occurr)
message(" --> handling data")
# data_path_cmpr <- paste0(thisDir, "")
# unzip(exdir = thisDir, zipfile = data_path_cmpr)
# untar(exdir = thisDir, tarfile = data_path_cmpr)
data_path <- paste0(thisDir, _INSERT)
data <- read_csv(file = data_path)
data <- read_tsv(file = data_path)
data <- read_excel(path = data_path)
data <- read_parquet(file = data_path)
data <- read_rds(file = data_path)
data <- st_read(dsn = data_path) |> as_tibble()
message(" --> normalizing data")
data <- data |>
mutate(obsID = row_number(), .before = 1) |>
st_as_sf(coords = c("_INSERT", "_INSERT"), crs = _INSERT) #|>
# st_transform(crs = 4326)
geom <- data |>
select(obsID, geometry)
data <- data |>
st_drop_geometry()
other <- data |>
select(obsID, _INSERT)
schema_INSERT <-
setFormat(header = _INSERT, decimal = _INSERT, thousand = _INSERT,
na_values = _INSERT) |>
setIDVar(name = "datasetID", value = thisDataset) |>
setIDVar(name = "obsID", type = "i", columns = 1) |>
setIDVar(name = "externalID", columns = _INSERT) |>
setIDVar(name = "disclosed", type = "l", value = _INSERT) |>
setIDVar(name = "date", columns = _INSERT) |>
setIDVar(name = "irrigated", type = "l", value = _INSERT) |>
setIDVar(name = "present", type = "l", value = _INSERT) |>
setIDVar(name = "sample_type", value = _INSERT) |>
setIDVar(name = "collector", value = _INSERT) |>
setIDVar(name = "purpose", value = _INSERT) |>
setObsVar(name = "concept", type = "c", columns = _INSERT)
temp <- reorganise(schema = schema_INSERT, input = data)
message(" --> harmonizing with ontology")
out <- matchOntology(table = temp,
columns = "concept",
colsAsClass = FALSE,
dataseries = thisDataset,
ontology = path_onto_occurr)
out <- out |>
# summarise(.by = c(datasetID, obsID, externalID, disclosed, date, irrigated, present, sample_type, collector, purpose, external, match),
# concept = paste0(na.omit(concept), collapse = " | "),
# id = paste0(na.omit(id), collapse = " | ")) |>
left_join(geom, by = "obsID")
message(" --> writing output")
st_write(obj = out, dsn = paste0(thisDir, "output.gpkg"))
saveRDS(object = other, file = paste0(thisDir, "output_other.rds"))
beep(sound = 10)
message("\n ... done")