-
Notifications
You must be signed in to change notification settings - Fork 43
Expand file tree
/
Copy pathMakefile
More file actions
113 lines (92 loc) · 4.38 KB
/
Makefile
File metadata and controls
113 lines (92 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
VERSION ?= $(shell git rev-parse --short HEAD)
IMAGE ?= fme
REGISTRY ?= registry.nersc.gov/m4492/ai2cm
ENVIRONMENT_NAME ?= fme
USERNAME ?= $(shell beaker account whoami --format=json | jq -r '.[0].name')
DEPLOY_TARGET ?= pypi
BEAKER_WORKSPACE = ai2/ace
NPROC ?= 2
FME_FORCE_CPU ?= 0
FME_DISTRIBUTED_BACKEND ?= torch
FME_DISTRIBUTED_H ?= 1
FME_DISTRIBUTED_W ?= 1
TEST_PATH ?= .
ifeq ($(shell uname), Linux)
CONDA_PACKAGES=gxx_linux-64 pip
else
CONDA_PACKAGES=pip
endif
build_docker_image:
DOCKER_BUILDKIT=1 docker build --platform=linux/amd64 -f docker/Dockerfile -t $(IMAGE):$(VERSION) --target production .
push_shifter_image: build_docker_image
docker tag $(IMAGE):$(VERSION) $(REGISTRY)/$(IMAGE):$(VERSION)
docker push $(REGISTRY)/$(IMAGE):$(VERSION)
build_beaker_image: build_docker_image
beaker image create --name $(IMAGE)-$(VERSION) $(IMAGE):$(VERSION)
build_podman_image:
podman-hpc build -f docker/Dockerfile -t $(IMAGE):$(VERSION) .
migrate_podman_image: build_podman_image
podman-hpc migrate $(IMAGE):$(VERSION)
enter_docker_image: build_docker_image
docker run -it --rm $(IMAGE):$(VERSION) bash
launch_beaker_session:
./launch-beaker-session.sh $(USERNAME)/$(IMAGE)-$(VERSION)
build_deps_only_image:
DOCKER_BUILDKIT=1 docker build --platform=linux/amd64 -f docker/Dockerfile -t $(IMAGE)-deps-only:$(VERSION) --target deps-only .
beaker image create $(IMAGE)-deps-only:$(VERSION) --name $(IMAGE)-deps-only-$(VERSION) --workspace ai2/ace-ci-tests
build_nsight_image:
DOCKER_BUILDKIT=1 docker build --platform=linux/amd64 -f docker/Dockerfile -t $(IMAGE)-nsight:$(VERSION) --target nsight .
beaker image create $(IMAGE)-nsight:$(VERSION) --name $(IMAGE)-nsight-$(VERSION) --workspace ai2/ace
# recommended to deactivate current conda environment before running this
create_environment:
conda create -n $(ENVIRONMENT_NAME) python=3.11 $(CONDA_PACKAGES)
conda run --no-capture-output -n $(ENVIRONMENT_NAME) python -m pip install uv
conda run --no-capture-output -n $(ENVIRONMENT_NAME) uv pip install -c constraints.txt -e .[dev,docs,graphcast]
conda run --no-capture-output -n $(ENVIRONMENT_NAME) uv pip install --no-build-isolation -c constraints.txt -r requirements-healpix.txt
conda run --no-capture-output -n $(ENVIRONMENT_NAME) uv pip install -r analysis-deps.txt
test:
pytest -n 4 --durations 40 $(TEST_PATH)
test_parallel:
FME_FORCE_CPU=$(FME_FORCE_CPU) \
FME_DISTRIBUTED_BACKEND=$(FME_DISTRIBUTED_BACKEND) \
FME_DISTRIBUTED_H=$(FME_DISTRIBUTED_H) \
FME_DISTRIBUTED_W=$(FME_DISTRIBUTED_W) \
torchrun --nproc-per-node $(NPROC) -m pytest -m parallel $(TEST_PATH)
# matrix for parallel tests
# | backend | WS | H | W |
# | ------- | -- | - | - |
# | torch | 2 | 1 | 1 |
# | torch | 3 | 1 | 1 |
# | model | 3 | 1 | 1 |
# | model | 2 | 2 | 1 |
# | model | 2 | 1 | 2 |
# | model | 3 | 3 | 1 |
# | model | 3 | 1 | 3 |
# | model | 4 | 2 | 2 |
# | model | 4 | 2 | 1 |
# | model | 8 | 2 | 2 |
cpu_test_all_parallel:
FME_FORCE_CPU=1 NPROC=2 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=2 FME_DISTRIBUTED_W=1 make test_parallel
FME_FORCE_CPU=1 NPROC=2 FME_DISTRIBUTED_BACKEND=torch FME_DISTRIBUTED_H=1 FME_DISTRIBUTED_W=1 make test_parallel
FME_FORCE_CPU=1 NPROC=3 FME_DISTRIBUTED_BACKEND=torch FME_DISTRIBUTED_H=1 FME_DISTRIBUTED_W=1 make test_parallel
FME_FORCE_CPU=1 NPROC=3 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=1 FME_DISTRIBUTED_W=1 make test_parallel
FME_FORCE_CPU=1 NPROC=2 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=1 FME_DISTRIBUTED_W=2 make test_parallel
FME_FORCE_CPU=1 NPROC=4 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=2 FME_DISTRIBUTED_W=2 make test_parallel
FME_FORCE_CPU=1 NPROC=4 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=2 FME_DISTRIBUTED_W=1 make test_parallel
FME_FORCE_CPU=1 NPROC=8 FME_DISTRIBUTED_BACKEND=model FME_DISTRIBUTED_H=2 FME_DISTRIBUTED_W=2 make test_parallel
# --cov must come after pytest args to use the sources defined by config
test_cov:
pytest -n 4 --durations 40 --cov --cov-report=term-missing:skip-covered --cov-config=pyproject.toml $(TEST_PATH)
test_fast:
pytest -n 4 --durations 40 --fast $(TEST_PATH)
test_very_fast:
pytest --durations 40 --very-fast $(TEST_PATH)
# For maintainer use only
# requires fme[deploy] to be installed
build_pypi:
rm -rf dist
python -m build
deploy_pypi: build_pypi
twine upload --repository $(DEPLOY_TARGET) dist/*
deploy_test_pypi: DEPLOY_TARGET = testpypi
deploy_test_pypi: deploy_pypi