Skip to content

Commit 87d5a1c

Browse files
committed
repair malformed media columns
1 parent efddba5 commit 87d5a1c

2 files changed

Lines changed: 108 additions & 26 deletions

File tree

src/formpack/utils/bugfix.py

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
from ..constants import UNTRANSLATED
66

77

8-
def repair_file_column_content_in_place(content) -> bool:
8+
def repair_media_column_content_in_place(content) -> bool:
99
"""
1010
#321 introduced a bug where the `file` column gets renamed to `media::file`
1111
and treated as a translatable column (see #322). This function repairs that
1212
damage.
1313
14-
This function is intended to be run by KPI, which should it to correct
14+
It also handles other `media::<type>` columns similarly affected by the same bug.
15+
16+
This function is intended to be run by KPI, which should use it to correct
1517
`Asset` and `AssetVersion` content.
1618
1719
Returns `True` if any change was made
@@ -27,47 +29,96 @@ def repair_file_column_content_in_place(content) -> bool:
2729
# Do not proceed if content is incomplete
2830
return False
2931

30-
try:
31-
updates['translated'].remove('media::file')
32-
except ValueError:
33-
# The invalid column `media::file` inside `translated` is a hallmark of
34-
# the problem this method is intended to fix. Do not proceed if it was
35-
# not found
32+
media_tokens = [
33+
t for t in updates['translated'] if isinstance(t, str) and t.startswith('media::')
34+
]
35+
if not media_tokens:
36+
# No media::<type> in translated, nothing to do
3637
return False
3738

39+
# Remove the media tokens from translated
40+
for token in media_tokens:
41+
try:
42+
updates['translated'].remove(token)
43+
except ValueError:
44+
pass
45+
3846
max_label_list_length = 0
3947
any_row_fixed = False
4048
for row in updates['survey']:
4149
max_label_list_length = max(
4250
max_label_list_length, len(row.get('label', []))
4351
)
44-
bad_file_col = row.get('media::file')
45-
if not bad_file_col:
46-
continue
47-
if not isinstance(bad_file_col, list):
48-
# All problems of our own making (#322) will result in
49-
# `media::file` being a list (or array when JSON)
50-
continue
51-
for val in bad_file_col:
52-
if val is not None:
53-
row['file'] = val
54-
del row['media::file']
52+
53+
# For each media token, handle its value:
54+
# - If value is a list, pick first non-None and migrate it.
55+
# - If scalar, migrate it.
56+
for media_tok in media_tokens:
57+
bad_val = row.get(media_tok)
58+
if bad_val is None:
59+
continue
60+
61+
# If it's a list (the broken shape), prefer the first non-None element
62+
if isinstance(bad_val, list):
63+
non_nulls = [v for v in bad_val if v is not None]
64+
if not non_nulls:
65+
try:
66+
del row[media_tok]
67+
except KeyError:
68+
pass
69+
any_row_fixed = True
70+
continue
71+
72+
chosen = non_nulls[0]
73+
short_key = media_tok.split('::', 1)[1]
74+
row[short_key] = chosen
75+
try:
76+
del row[media_tok]
77+
except KeyError:
78+
pass
5579
any_row_fixed = True
56-
break
80+
continue
81+
82+
# Scalar value: migrate it directly
83+
short_key = media_tok.split('::', 1)[1]
84+
row[short_key] = bad_val
85+
try:
86+
del row[media_tok]
87+
except KeyError:
88+
pass
89+
any_row_fixed = True
90+
5791
if not any_row_fixed:
5892
return False
5993

6094
# Multi-language forms need an additional fix to remove a superfluous null
61-
# translation added by the bogus `media::file` column
95+
# translation added by the bogus `media::*` column(s)
6296
if len(updates['translations']) > max_label_list_length:
6397
labels_translations_mismatch = True
6498
if len(updates['translations']) == max_label_list_length + 1:
6599
try:
66-
updates['translations'].remove(UNTRANSLATED)
100+
if UNTRANSLATED is not None:
101+
updates['translations'].remove(UNTRANSLATED)
102+
labels_translations_mismatch = False
103+
else:
104+
# If sentinel not present, remove a media tail name
105+
tails = [m.split('::', 1)[1] for m in media_tokens]
106+
removed = False
107+
for t in tails:
108+
try:
109+
updates['translations'].remove(t)
110+
removed = True
111+
break
112+
except ValueError:
113+
continue
114+
if removed:
115+
labels_translations_mismatch = False
116+
else:
117+
updates['translations'].pop()
118+
labels_translations_mismatch = False
67119
except ValueError:
68120
pass
69-
else:
70-
labels_translations_mismatch = False # Fixed it!
121+
71122
if labels_translations_mismatch:
72123
# This form has uncorrected problems. Bail out instead of modifying
73124
# it at all

tests/test_bugfix.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# coding: utf-8
2+
from copy import deepcopy
23

3-
from formpack.utils.bugfix import repair_file_column_content_in_place
4+
from formpack.constants import UNTRANSLATED
5+
from formpack.utils.bugfix import repair_media_column_content_in_place
46

57

68
def test_repair_file_column():
@@ -26,7 +28,36 @@ def test_repair_file_column():
2628
'translated': ['label', 'media::file'],
2729
'translations': ['Ukrainian', 'English', None],
2830
}
29-
assert repair_file_column_content_in_place(content)
31+
assert repair_media_column_content_in_place(content)
3032
assert content['survey'][1]['file'] == 'oblast.csv'
3133
assert content['translated'] == ['label']
3234
assert content['translations'] == ['Ukrainian', 'English']
35+
36+
37+
def test_repair_media_image_none_plus_value_and_translations():
38+
content = {
39+
'schema': '1',
40+
'settings': {},
41+
'survey': [
42+
{
43+
'label': ['Código de participante'],
44+
'name': 'repro_media',
45+
'type': 'text',
46+
'media::image': [None, 'repro_test.png'],
47+
'$xpath': 'repro_media',
48+
}
49+
],
50+
'translated': ['label', 'media::image'],
51+
'translations': [UNTRANSLATED, 'big-image'],
52+
}
53+
content_copy = deepcopy(content)
54+
55+
assert repair_media_column_content_in_place(content_copy) is True
56+
assert 'image' in content_copy['survey'][0]
57+
assert content_copy['survey'][0]['image'] == 'repro_test.png'
58+
59+
# 'media::image' token must be removed from translated
60+
assert 'media::image' not in content_copy['translated']
61+
assert 'label' in content_copy['translated']
62+
resulting_translations = content_copy['translations']
63+
assert resulting_translations in (['big-image'], [UNTRANSLATED], [None])

0 commit comments

Comments
 (0)