diff --git a/CHANGELOG.md b/CHANGELOG.md index db9c0bcabd..51a0abad85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- fix: match Transformers `tojson` in chat template rendering by @CISC in #1486 - fix: use env var configured multimodal library override paths when loading shared libraries by @navratil-matej in #1782 - feat: add Jinja2 loop controls to chat templates by @handshape in #2018 - fix: avoid cleanup errors for partially initialized `LlamaModel` objects by @usernames122 in #2173 diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index ed51b728cf..0b68d3a279 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -219,7 +219,7 @@ def __init__( set(stop_token_ids) if stop_token_ids is not None else None ) - self._environment = ImmutableSandboxedEnvironment( + environment = ImmutableSandboxedEnvironment( loader=jinja2.BaseLoader(), trim_blocks=True, lstrip_blocks=True, @@ -229,12 +229,32 @@ def __init__( Jinja2ChatFormatter.IgnoreGenerationTags, jinja2.ext.loopcontrols, ], - ).from_string(self.template) + ) + # Match Transformers' chat-template JSON rendering behavior. + # https://github.com/huggingface/transformers/blob/39603d0e5cdb6f00e8d473d7fcbb01032d709181/src/transformers/utils/chat_template_utils.py#L481-L484 + environment.filters["tojson"] = self.tojson + self._environment = environment.from_string(self.template) @staticmethod def strftime_now(f: str) -> str: return datetime.now().strftime(f) + @staticmethod + def tojson( + x: Any, + ensure_ascii: bool = False, + indent: Optional[int] = None, + separators: Optional[Tuple[str, str]] = None, + sort_keys: bool = False, + ) -> str: + return json.dumps( + x, + ensure_ascii=ensure_ascii, + indent=indent, + separators=separators, + sort_keys=sort_keys, + ) + def __call__( self, *,