bring in two tricks from the cogview paper for reducing the chances of overflow, for attention and layernorm

This commit is contained in:
Phil Wang
2022-07-05 14:27:04 -07:00
parent e1fe3089df
commit b9a908ff75
2 changed files with 18 additions and 5 deletions

View File

@@ -1 +1 @@
__version__ = '0.16.0'
__version__ = '0.16.2'