/* Generated by Cython 0.29.19 */

#define PY_SSIZE_T_CLEAN
#include "Python.h"
#ifndef Py_PYTHON_H
    #error Python headers needed to compile C extensions, please install development version of Python.
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
    #error Cython requires Python 2.6+ or Python 3.3+.
#else
#define CYTHON_ABI "0_29_19"
#define CYTHON_HEX_VERSION 0x001D13F0
#define CYTHON_FUTURE_DIVISION 0
#include <stddef.h>
#ifndef offsetof
  #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
#endif
#if !defined(WIN32) && !defined(MS_WINDOWS)
  #ifndef __stdcall
    #define __stdcall
  #endif
  #ifndef __cdecl
    #define __cdecl
  #endif
  #ifndef __fastcall
    #define __fastcall
  #endif
#endif
#ifndef DL_IMPORT
  #define DL_IMPORT(t) t
#endif
#ifndef DL_EXPORT
  #define DL_EXPORT(t) t
#endif
#define __PYX_COMMA ,
#ifndef HAVE_LONG_LONG
  #if PY_VERSION_HEX >= 0x02070000
    #define HAVE_LONG_LONG
  #endif
#endif
#ifndef PY_LONG_LONG
  #define PY_LONG_LONG LONG_LONG
#endif
#ifndef Py_HUGE_VAL
  #define Py_HUGE_VAL HUGE_VAL
#endif
#ifdef PYPY_VERSION
  #define CYTHON_COMPILING_IN_PYPY 1
  #define CYTHON_COMPILING_IN_PYSTON 0
  #define CYTHON_COMPILING_IN_CPYTHON 0
  #undef CYTHON_USE_TYPE_SLOTS
  #define CYTHON_USE_TYPE_SLOTS 0
  #undef CYTHON_USE_PYTYPE_LOOKUP
  #define CYTHON_USE_PYTYPE_LOOKUP 0
  #if PY_VERSION_HEX < 0x03050000
    #undef CYTHON_USE_ASYNC_SLOTS
    #define CYTHON_USE_ASYNC_SLOTS 0
  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
    #define CYTHON_USE_ASYNC_SLOTS 1
  #endif
  #undef CYTHON_USE_PYLIST_INTERNALS
  #define CYTHON_USE_PYLIST_INTERNALS 0
  #undef CYTHON_USE_UNICODE_INTERNALS
  #define CYTHON_USE_UNICODE_INTERNALS 0
  #undef CYTHON_USE_UNICODE_WRITER
  #define CYTHON_USE_UNICODE_WRITER 0
  #undef CYTHON_USE_PYLONG_INTERNALS
  #define CYTHON_USE_PYLONG_INTERNALS 0
  #undef CYTHON_AVOID_BORROWED_REFS
  #define CYTHON_AVOID_BORROWED_REFS 1
  #undef CYTHON_ASSUME_SAFE_MACROS
  #define CYTHON_ASSUME_SAFE_MACROS 0
  #undef CYTHON_UNPACK_METHODS
  #define CYTHON_UNPACK_METHODS 0
  #undef CYTHON_FAST_THREAD_STATE
  #define CYTHON_FAST_THREAD_STATE 0
  #undef CYTHON_FAST_PYCALL
  #define CYTHON_FAST_PYCALL 0
  #undef CYTHON_PEP489_MULTI_PHASE_INIT
  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
  #undef CYTHON_USE_TP_FINALIZE
  #define CYTHON_USE_TP_FINALIZE 0
  #undef CYTHON_USE_DICT_VERSIONS
  #define CYTHON_USE_DICT_VERSIONS 0
  #undef CYTHON_USE_EXC_INFO_STACK
  #define CYTHON_USE_EXC_INFO_STACK 0
#elif defined(PYSTON_VERSION)
  #define CYTHON_COMPILING_IN_PYPY 0
  #define CYTHON_COMPILING_IN_PYSTON 1
  #define CYTHON_COMPILING_IN_CPYTHON 0
  #ifndef CYTHON_USE_TYPE_SLOTS
    #define CYTHON_USE_TYPE_SLOTS 1
  #endif
  #undef CYTHON_USE_PYTYPE_LOOKUP
  #define CYTHON_USE_PYTYPE_LOOKUP 0
  #undef CYTHON_USE_ASYNC_SLOTS
  #define CYTHON_USE_ASYNC_SLOTS 0
  #undef CYTHON_USE_PYLIST_INTERNALS
  #define CYTHON_USE_PYLIST_INTERNALS 0
  #ifndef CYTHON_USE_UNICODE_INTERNALS
    #define CYTHON_USE_UNICODE_INTERNALS 1
  #endif
  #undef CYTHON_USE_UNICODE_WRITER
  #define CYTHON_USE_UNICODE_WRITER 0
  #undef CYTHON_USE_PYLONG_INTERNALS
  #define CYTHON_USE_PYLONG_INTERNALS 0
  #ifndef CYTHON_AVOID_BORROWED_REFS
    #define CYTHON_AVOID_BORROWED_REFS 0
  #endif
  #ifndef CYTHON_ASSUME_SAFE_MACROS
    #define CYTHON_ASSUME_SAFE_MACROS 1
  #endif
  #ifndef CYTHON_UNPACK_METHODS
    #define CYTHON_UNPACK_METHODS 1
  #endif
  #undef CYTHON_FAST_THREAD_STATE
  #define CYTHON_FAST_THREAD_STATE 0
  #undef CYTHON_FAST_PYCALL
  #define CYTHON_FAST_PYCALL 0
  #undef CYTHON_PEP489_MULTI_PHASE_INIT
  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
  #undef CYTHON_USE_TP_FINALIZE
  #define CYTHON_USE_TP_FINALIZE 0
  #undef CYTHON_USE_DICT_VERSIONS
  #define CYTHON_USE_DICT_VERSIONS 0
  #undef CYTHON_USE_EXC_INFO_STACK
  #define CYTHON_USE_EXC_INFO_STACK 0
#else
  #define CYTHON_COMPILING_IN_PYPY 0
  #define CYTHON_COMPILING_IN_PYSTON 0
  #define CYTHON_COMPILING_IN_CPYTHON 1
  #ifndef CYTHON_USE_TYPE_SLOTS
    #define CYTHON_USE_TYPE_SLOTS 1
  #endif
  #if PY_VERSION_HEX < 0x02070000
    #undef CYTHON_USE_PYTYPE_LOOKUP
    #define CYTHON_USE_PYTYPE_LOOKUP 0
  #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
    #define CYTHON_USE_PYTYPE_LOOKUP 1
  #endif
  #if PY_MAJOR_VERSION < 3
    #undef CYTHON_USE_ASYNC_SLOTS
    #define CYTHON_USE_ASYNC_SLOTS 0
  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
    #define CYTHON_USE_ASYNC_SLOTS 1
  #endif
  #if PY_VERSION_HEX < 0x02070000
    #undef CYTHON_USE_PYLONG_INTERNALS
    #define CYTHON_USE_PYLONG_INTERNALS 0
  #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
    #define CYTHON_USE_PYLONG_INTERNALS 1
  #endif
  #ifndef CYTHON_USE_PYLIST_INTERNALS
    #define CYTHON_USE_PYLIST_INTERNALS 1
  #endif
  #ifndef CYTHON_USE_UNICODE_INTERNALS
    #define CYTHON_USE_UNICODE_INTERNALS 1
  #endif
  #if PY_VERSION_HEX < 0x030300F0
    #undef CYTHON_USE_UNICODE_WRITER
    #define CYTHON_USE_UNICODE_WRITER 0
  #elif !defined(CYTHON_USE_UNICODE_WRITER)
    #define CYTHON_USE_UNICODE_WRITER 1
  #endif
  #ifndef CYTHON_AVOID_BORROWED_REFS
    #define CYTHON_AVOID_BORROWED_REFS 0
  #endif
  #ifndef CYTHON_ASSUME_SAFE_MACROS
    #define CYTHON_ASSUME_SAFE_MACROS 1
  #endif
  #ifndef CYTHON_UNPACK_METHODS
    #define CYTHON_UNPACK_METHODS 1
  #endif
  #ifndef CYTHON_FAST_THREAD_STATE
    #define CYTHON_FAST_THREAD_STATE 1
  #endif
  #ifndef CYTHON_FAST_PYCALL
    #define CYTHON_FAST_PYCALL 1
  #endif
  #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
    #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
  #endif
  #ifndef CYTHON_USE_TP_FINALIZE
    #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
  #endif
  #ifndef CYTHON_USE_DICT_VERSIONS
    #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1)
  #endif
  #ifndef CYTHON_USE_EXC_INFO_STACK
    #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
  #endif
#endif
#if !defined(CYTHON_FAST_PYCCALL)
#define CYTHON_FAST_PYCCALL  (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
#endif
#if CYTHON_USE_PYLONG_INTERNALS
  #include "longintrepr.h"
  #undef SHIFT
  #undef BASE
  #undef MASK
  #ifdef SIZEOF_VOID_P
    enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
  #endif
#endif
#ifndef __has_attribute
  #define __has_attribute(x) 0
#endif
#ifndef __has_cpp_attribute
  #define __has_cpp_attribute(x) 0
#endif
#ifndef CYTHON_RESTRICT
  #if defined(__GNUC__)
    #define CYTHON_RESTRICT __restrict__
  #elif defined(_MSC_VER) && _MSC_VER >= 1400
    #define CYTHON_RESTRICT __restrict
  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
    #define CYTHON_RESTRICT restrict
  #else
    #define CYTHON_RESTRICT
  #endif
#endif
#ifndef CYTHON_UNUSED
# if defined(__GNUC__)
#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
#     define CYTHON_UNUSED __attribute__ ((__unused__))
#   else
#     define CYTHON_UNUSED
#   endif
# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
#   define CYTHON_UNUSED __attribute__ ((__unused__))
# else
#   define CYTHON_UNUSED
# endif
#endif
#ifndef CYTHON_MAYBE_UNUSED_VAR
#  if defined(__cplusplus)
     template<class T> void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
#  else
#    define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
#  endif
#endif
#ifndef CYTHON_NCP_UNUSED
# if CYTHON_COMPILING_IN_CPYTHON
#  define CYTHON_NCP_UNUSED
# else
#  define CYTHON_NCP_UNUSED CYTHON_UNUSED
# endif
#endif
#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
#ifdef _MSC_VER
    #ifndef _MSC_STDINT_H_
        #if _MSC_VER < 1300
           typedef unsigned char     uint8_t;
           typedef unsigned int      uint32_t;
        #else
           typedef unsigned __int8   uint8_t;
           typedef unsigned __int32  uint32_t;
        #endif
    #endif
#else
   #include <stdint.h>
#endif
#ifndef CYTHON_FALLTHROUGH
  #if defined(__cplusplus) && __cplusplus >= 201103L
    #if __has_cpp_attribute(fallthrough)
      #define CYTHON_FALLTHROUGH [[fallthrough]]
    #elif __has_cpp_attribute(clang::fallthrough)
      #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
    #elif __has_cpp_attribute(gnu::fallthrough)
      #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
    #endif
  #endif
  #ifndef CYTHON_FALLTHROUGH
    #if __has_attribute(fallthrough)
      #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
    #else
      #define CYTHON_FALLTHROUGH
    #endif
  #endif
  #if defined(__clang__ ) && defined(__apple_build_version__)
    #if __apple_build_version__ < 7000000
      #undef  CYTHON_FALLTHROUGH
      #define CYTHON_FALLTHROUGH
    #endif
  #endif
#endif

#ifndef __cplusplus
  #error "Cython files generated with the C++ option must be compiled with a C++ compiler."
#endif
#ifndef CYTHON_INLINE
  #if defined(__clang__)
    #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
  #else
    #define CYTHON_INLINE inline
  #endif
#endif
template<typename T>
void __Pyx_call_destructor(T& x) {
    x.~T();
}
template<typename T>
class __Pyx_FakeReference {
  public:
    __Pyx_FakeReference() : ptr(NULL) { }
    __Pyx_FakeReference(const T& ref) : ptr(const_cast<T*>(&ref)) { }
    T *operator->() { return ptr; }
    T *operator&() { return ptr; }
    operator T&() { return *ptr; }
    template<typename U> bool operator ==(U other) { return *ptr == other; }
    template<typename U> bool operator !=(U other) { return *ptr != other; }
  private:
    T *ptr;
};

#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
  #define Py_OptimizeFlag 0
#endif
#define __PYX_BUILD_PY_SSIZE_T "n"
#define CYTHON_FORMAT_SSIZE_T "z"
#if PY_MAJOR_VERSION < 3
  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
  #define __Pyx_DefaultClassType PyClass_Type
#else
  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
#if PY_VERSION_HEX >= 0x030800A4 && PY_VERSION_HEX < 0x030800B2
  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
          PyCode_New(a, 0, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#else
  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
#endif
  #define __Pyx_DefaultClassType PyType_Type
#endif
#ifndef Py_TPFLAGS_CHECKTYPES
  #define Py_TPFLAGS_CHECKTYPES 0
#endif
#ifndef Py_TPFLAGS_HAVE_INDEX
  #define Py_TPFLAGS_HAVE_INDEX 0
#endif
#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
#endif
#ifndef Py_TPFLAGS_HAVE_FINALIZE
  #define Py_TPFLAGS_HAVE_FINALIZE 0
#endif
#ifndef METH_STACKLESS
  #define METH_STACKLESS 0
#endif
#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
  #ifndef METH_FASTCALL
     #define METH_FASTCALL 0x80
  #endif
  typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
  typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
                                                          Py_ssize_t nargs, PyObject *kwnames);
#else
  #define __Pyx_PyCFunctionFast _PyCFunctionFast
  #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
#endif
#if CYTHON_FAST_PYCCALL
#define __Pyx_PyFastCFunction_Check(func)\
    ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
#else
#define __Pyx_PyFastCFunction_Check(func) 0
#endif
#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
  #define PyObject_Malloc(s)   PyMem_Malloc(s)
  #define PyObject_Free(p)     PyMem_Free(p)
  #define PyObject_Realloc(p)  PyMem_Realloc(p)
#endif
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
  #define PyMem_RawMalloc(n)           PyMem_Malloc(n)
  #define PyMem_RawRealloc(p, n)       PyMem_Realloc(p, n)
  #define PyMem_RawFree(p)             PyMem_Free(p)
#endif
#if CYTHON_COMPILING_IN_PYSTON
  #define __Pyx_PyCode_HasFreeVars(co)  PyCode_HasFreeVars(co)
  #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
#else
  #define __Pyx_PyCode_HasFreeVars(co)  (PyCode_GetNumFree(co) > 0)
  #define __Pyx_PyFrame_SetLineNumber(frame, lineno)  (frame)->f_lineno = (lineno)
#endif
#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
  #define __Pyx_PyThreadState_Current PyThreadState_GET()
#elif PY_VERSION_HEX >= 0x03060000
  #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
#elif PY_VERSION_HEX >= 0x03000000
  #define __Pyx_PyThreadState_Current PyThreadState_GET()
#else
  #define __Pyx_PyThreadState_Current _PyThreadState_Current
#endif
#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
#include "pythread.h"
#define Py_tss_NEEDS_INIT 0
typedef int Py_tss_t;
static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
  *key = PyThread_create_key();
  return 0;
}
static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
  Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
  *key = Py_tss_NEEDS_INIT;
  return key;
}
static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
  PyObject_Free(key);
}
static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
  return *key != Py_tss_NEEDS_INIT;
}
static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
  PyThread_delete_key(*key);
  *key = Py_tss_NEEDS_INIT;
}
static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
  return PyThread_set_key_value(*key, value);
}
static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
  return PyThread_get_key_value(*key);
}
#endif
#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
#define __Pyx_PyDict_NewPresized(n)  ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
#else
#define __Pyx_PyDict_NewPresized(n)  PyDict_New()
#endif
#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
#else
  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
#endif
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
#define __Pyx_PyDict_GetItemStr(dict, name)  _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
#else
#define __Pyx_PyDict_GetItemStr(dict, name)  PyDict_GetItem(dict, name)
#endif
#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
  #define CYTHON_PEP393_ENABLED 1
  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
                                              0 : _PyUnicode_Ready((PyObject *)(op)))
  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, ch)
  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
#else
  #define CYTHON_PEP393_ENABLED 0
  #define PyUnicode_1BYTE_KIND  1
  #define PyUnicode_2BYTE_KIND  2
  #define PyUnicode_4BYTE_KIND  4
  #define __Pyx_PyUnicode_READY(op)       (0)
  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
#endif
#if CYTHON_COMPILING_IN_PYPY
  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
#else
  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
#endif
#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
  #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
#endif
#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
  #define PyByteArray_Check(obj)  PyObject_TypeCheck(obj, &PyByteArray_Type)
#endif
#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
  #define PyObject_Format(obj, fmt)  PyObject_CallMethod(obj, "__format__", "O", fmt)
#endif
#define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
#define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
#if PY_MAJOR_VERSION >= 3
  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
#else
  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
#endif
#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
  #define PyObject_ASCII(o)            PyObject_Repr(o)
#endif
#if PY_MAJOR_VERSION >= 3
  #define PyBaseString_Type            PyUnicode_Type
  #define PyStringObject               PyUnicodeObject
  #define PyString_Type                PyUnicode_Type
  #define PyString_Check               PyUnicode_Check
  #define PyString_CheckExact          PyUnicode_CheckExact
#ifndef PyObject_Unicode
  #define PyObject_Unicode             PyObject_Str
#endif
#endif
#if PY_MAJOR_VERSION >= 3
  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
#else
  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
#endif
#ifndef PySet_CheckExact
  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
#endif
#if CYTHON_ASSUME_SAFE_MACROS
  #define __Pyx_PySequence_SIZE(seq)  Py_SIZE(seq)
#else
  #define __Pyx_PySequence_SIZE(seq)  PySequence_Size(seq)
#endif
#if PY_MAJOR_VERSION >= 3
  #define PyIntObject                  PyLongObject
  #define PyInt_Type                   PyLong_Type
  #define PyInt_Check(op)              PyLong_Check(op)
  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
  #define PyInt_FromString             PyLong_FromString
  #define PyInt_FromUnicode            PyLong_FromUnicode
  #define PyInt_FromLong               PyLong_FromLong
  #define PyInt_FromSize_t             PyLong_FromSize_t
  #define PyInt_FromSsize_t            PyLong_FromSsize_t
  #define PyInt_AsLong                 PyLong_AsLong
  #define PyInt_AS_LONG                PyLong_AS_LONG
  #define PyInt_AsSsize_t              PyLong_AsSsize_t
  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
  #define PyNumber_Int                 PyNumber_Long
#endif
#if PY_MAJOR_VERSION >= 3
  #define PyBoolObject                 PyLongObject
#endif
#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
  #ifndef PyUnicode_InternFromString
    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
  #endif
#endif
#if PY_VERSION_HEX < 0x030200A4
  typedef long Py_hash_t;
  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
  #define __Pyx_PyInt_AsHash_t   PyInt_AsLong
#else
  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
  #define __Pyx_PyInt_AsHash_t   PyInt_AsSsize_t
#endif
#if PY_MAJOR_VERSION >= 3
  #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : (Py_INCREF(func), func))
#else
  #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
#endif
#if CYTHON_USE_ASYNC_SLOTS
  #if PY_VERSION_HEX >= 0x030500B1
    #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
    #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
  #else
    #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
  #endif
#else
  #define __Pyx_PyType_AsAsync(obj) NULL
#endif
#ifndef __Pyx_PyAsyncMethodsStruct
    typedef struct {
        unaryfunc am_await;
        unaryfunc am_aiter;
        unaryfunc am_anext;
    } __Pyx_PyAsyncMethodsStruct;
#endif

#if defined(WIN32) || defined(MS_WINDOWS)
  #define _USE_MATH_DEFINES
#endif
#include <math.h>
#ifdef NAN
#define __PYX_NAN() ((float) NAN)
#else
static CYTHON_INLINE float __PYX_NAN() {
  float value;
  memset(&value, 0xFF, sizeof(value));
  return value;
}
#endif
#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
#define __Pyx_truncl trunc
#else
#define __Pyx_truncl truncl
#endif

#define __PYX_MARK_ERR_POS(f_index, lineno) \
    { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
#define __PYX_ERR(f_index, lineno, Ln_error) \
    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }

#ifndef __PYX_EXTERN_C
  #ifdef __cplusplus
    #define __PYX_EXTERN_C extern "C"
  #else
    #define __PYX_EXTERN_C extern
  #endif
#endif

#define __PYX_HAVE__thinc__neural__optimizers
#define __PYX_HAVE_API__thinc__neural__optimizers
/* Early includes */
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include <stdint.h>
#ifdef _OPENMP
#include <omp.h>
#endif /* _OPENMP */

#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
#define CYTHON_WITHOUT_ASSERTIONS
#endif

typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;

#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
#define __PYX_DEFAULT_STRING_ENCODING ""
#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#define __Pyx_uchar_cast(c) ((unsigned char)c)
#define __Pyx_long_cast(x) ((long)x)
#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
    (sizeof(type) < sizeof(Py_ssize_t))  ||\
    (sizeof(type) > sizeof(Py_ssize_t) &&\
          likely(v < (type)PY_SSIZE_T_MAX ||\
                 v == (type)PY_SSIZE_T_MAX)  &&\
          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
                                v == (type)PY_SSIZE_T_MIN)))  ||\
    (sizeof(type) == sizeof(Py_ssize_t) &&\
          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
                               v == (type)PY_SSIZE_T_MAX)))  )
static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
    return (size_t) i < (size_t) limit;
}
#if defined (__cplusplus) && __cplusplus >= 201103L
    #include <cstdlib>
    #define __Pyx_sst_abs(value) std::abs(value)
#elif SIZEOF_INT >= SIZEOF_SIZE_T
    #define __Pyx_sst_abs(value) abs(value)
#elif SIZEOF_LONG >= SIZEOF_SIZE_T
    #define __Pyx_sst_abs(value) labs(value)
#elif defined (_MSC_VER)
    #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
    #define __Pyx_sst_abs(value) llabs(value)
#elif defined (__GNUC__)
    #define __Pyx_sst_abs(value) __builtin_llabs(value)
#else
    #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
#endif
static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
#define __Pyx_PyBytes_FromString        PyBytes_FromString
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
#if PY_MAJOR_VERSION < 3
    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#else
    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
#endif
#define __Pyx_PyBytes_AsWritableString(s)     ((char*) PyBytes_AS_STRING(s))
#define __Pyx_PyBytes_AsWritableSString(s)    ((signed char*) PyBytes_AS_STRING(s))
#define __Pyx_PyBytes_AsWritableUString(s)    ((unsigned char*) PyBytes_AS_STRING(s))
#define __Pyx_PyBytes_AsString(s)     ((const char*) PyBytes_AS_STRING(s))
#define __Pyx_PyBytes_AsSString(s)    ((const signed char*) PyBytes_AS_STRING(s))
#define __Pyx_PyBytes_AsUString(s)    ((const unsigned char*) PyBytes_AS_STRING(s))
#define __Pyx_PyObject_AsWritableString(s)    ((char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsWritableSString(s)    ((signed char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsWritableUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsSString(s)    ((const signed char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_AsUString(s)    ((const unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
#define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
#define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
#define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
    const Py_UNICODE *u_end = u;
    while (*u_end++) ;
    return (size_t)(u_end - u - 1);
}
#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
#define __Pyx_PySequence_Tuple(obj)\
    (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
#if CYTHON_ASSUME_SAFE_MACROS
#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
#else
#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
#endif
#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
#else
#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
#endif
#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
    PyObject* sys;
    PyObject* default_encoding = NULL;
    PyObject* ascii_chars_u = NULL;
    PyObject* ascii_chars_b = NULL;
    const char* default_encoding_c;
    sys = PyImport_ImportModule("sys");
    if (!sys) goto bad;
    default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
    Py_DECREF(sys);
    if (!default_encoding) goto bad;
    default_encoding_c = PyBytes_AsString(default_encoding);
    if (!default_encoding_c) goto bad;
    if (strcmp(default_encoding_c, "ascii") == 0) {
        __Pyx_sys_getdefaultencoding_not_ascii = 0;
    } else {
        char ascii_chars[128];
        int c;
        for (c = 0; c < 128; c++) {
            ascii_chars[c] = c;
        }
        __Pyx_sys_getdefaultencoding_not_ascii = 1;
        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
        if (!ascii_chars_u) goto bad;
        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
            PyErr_Format(
                PyExc_ValueError,
                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
                default_encoding_c);
            goto bad;
        }
        Py_DECREF(ascii_chars_u);
        Py_DECREF(ascii_chars_b);
    }
    Py_DECREF(default_encoding);
    return 0;
bad:
    Py_XDECREF(default_encoding);
    Py_XDECREF(ascii_chars_u);
    Py_XDECREF(ascii_chars_b);
    return -1;
}
#endif
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
#else
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
static char* __PYX_DEFAULT_STRING_ENCODING;
static int __Pyx_init_sys_getdefaultencoding_params(void) {
    PyObject* sys;
    PyObject* default_encoding = NULL;
    char* default_encoding_c;
    sys = PyImport_ImportModule("sys");
    if (!sys) goto bad;
    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
    Py_DECREF(sys);
    if (!default_encoding) goto bad;
    default_encoding_c = PyBytes_AsString(default_encoding);
    if (!default_encoding_c) goto bad;
    __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
    Py_DECREF(default_encoding);
    return 0;
bad:
    Py_XDECREF(default_encoding);
    return -1;
}
#endif
#endif


/* Test for GCC > 2.95 */
#if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
  #define likely(x)   __builtin_expect(!!(x), 1)
  #define unlikely(x) __builtin_expect(!!(x), 0)
#else /* !__GNUC__ or GCC < 2.95 */
  #define likely(x)   (x)
  #define unlikely(x) (x)
#endif /* __GNUC__ */
static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }

static PyObject *__pyx_m = NULL;
static PyObject *__pyx_d;
static PyObject *__pyx_b;
static PyObject *__pyx_cython_runtime = NULL;
static PyObject *__pyx_empty_tuple;
static PyObject *__pyx_empty_bytes;
static PyObject *__pyx_empty_unicode;
static int __pyx_lineno;
static int __pyx_clineno = 0;
static const char * __pyx_cfilenm= __FILE__;
static const char *__pyx_filename;


static const char *__pyx_f[] = {
  "optimizers.pyx",
};

/* "typedefs.pxd":7
 * 
 * 
 * ctypedef float weight_t             # <<<<<<<<<<<<<<
 * ctypedef uint64_t atom_t
 * ctypedef uint64_t feat_t
 */
typedef float __pyx_t_5thinc_8typedefs_weight_t;

/* "typedefs.pxd":8
 * 
 * ctypedef float weight_t
 * ctypedef uint64_t atom_t             # <<<<<<<<<<<<<<
 * ctypedef uint64_t feat_t
 * ctypedef uint64_t hash_t
 */
typedef uint64_t __pyx_t_5thinc_8typedefs_atom_t;

/* "typedefs.pxd":9
 * ctypedef float weight_t
 * ctypedef uint64_t atom_t
 * ctypedef uint64_t feat_t             # <<<<<<<<<<<<<<
 * ctypedef uint64_t hash_t
 * ctypedef int32_t class_t
 */
typedef uint64_t __pyx_t_5thinc_8typedefs_feat_t;

/* "typedefs.pxd":10
 * ctypedef uint64_t atom_t
 * ctypedef uint64_t feat_t
 * ctypedef uint64_t hash_t             # <<<<<<<<<<<<<<
 * ctypedef int32_t class_t
 * ctypedef uint32_t count_t
 */
typedef uint64_t __pyx_t_5thinc_8typedefs_hash_t;

/* "typedefs.pxd":11
 * ctypedef uint64_t feat_t
 * ctypedef uint64_t hash_t
 * ctypedef int32_t class_t             # <<<<<<<<<<<<<<
 * ctypedef uint32_t count_t
 * ctypedef uint32_t time_t
 */
typedef int32_t __pyx_t_5thinc_8typedefs_class_t;

/* "typedefs.pxd":12
 * ctypedef uint64_t hash_t
 * ctypedef int32_t class_t
 * ctypedef uint32_t count_t             # <<<<<<<<<<<<<<
 * ctypedef uint32_t time_t
 * ctypedef int32_t len_t
 */
typedef uint32_t __pyx_t_5thinc_8typedefs_count_t;

/* "typedefs.pxd":13
 * ctypedef int32_t class_t
 * ctypedef uint32_t count_t
 * ctypedef uint32_t time_t             # <<<<<<<<<<<<<<
 * ctypedef int32_t len_t
 * ctypedef int32_t idx_t
 */
typedef uint32_t __pyx_t_5thinc_8typedefs_time_t;

/* "typedefs.pxd":14
 * ctypedef uint32_t count_t
 * ctypedef uint32_t time_t
 * ctypedef int32_t len_t             # <<<<<<<<<<<<<<
 * ctypedef int32_t idx_t
 * 
 */
typedef int32_t __pyx_t_5thinc_8typedefs_len_t;

/* "typedefs.pxd":15
 * ctypedef uint32_t time_t
 * ctypedef int32_t len_t
 * ctypedef int32_t idx_t             # <<<<<<<<<<<<<<
 * 
 * 
 */
typedef int32_t __pyx_t_5thinc_8typedefs_idx_t;

/*--- Type declarations ---*/

/* --- Runtime support code (head) --- */
/* Refnanny.proto */
#ifndef CYTHON_REFNANNY
  #define CYTHON_REFNANNY 0
#endif
#if CYTHON_REFNANNY
  typedef struct {
    void (*INCREF)(void*, PyObject*, int);
    void (*DECREF)(void*, PyObject*, int);
    void (*GOTREF)(void*, PyObject*, int);
    void (*GIVEREF)(void*, PyObject*, int);
    void* (*SetupContext)(const char*, int, const char*);
    void (*FinishContext)(void**);
  } __Pyx_RefNannyAPIStruct;
  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
#ifdef WITH_THREAD
  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
          if (acquire_gil) {\
              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
              PyGILState_Release(__pyx_gilstate_save);\
          } else {\
              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
          }
#else
  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
#endif
  #define __Pyx_RefNannyFinishContext()\
          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
#else
  #define __Pyx_RefNannyDeclarations
  #define __Pyx_RefNannySetupContext(name, acquire_gil)
  #define __Pyx_RefNannyFinishContext()
  #define __Pyx_INCREF(r) Py_INCREF(r)
  #define __Pyx_DECREF(r) Py_DECREF(r)
  #define __Pyx_GOTREF(r)
  #define __Pyx_GIVEREF(r)
  #define __Pyx_XINCREF(r) Py_XINCREF(r)
  #define __Pyx_XDECREF(r) Py_XDECREF(r)
  #define __Pyx_XGOTREF(r)
  #define __Pyx_XGIVEREF(r)
#endif
#define __Pyx_XDECREF_SET(r, v) do {\
        PyObject *tmp = (PyObject *) r;\
        r = v; __Pyx_XDECREF(tmp);\
    } while (0)
#define __Pyx_DECREF_SET(r, v) do {\
        PyObject *tmp = (PyObject *) r;\
        r = v; __Pyx_DECREF(tmp);\
    } while (0)
#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)

/* PyObjectGetAttrStr.proto */
#if CYTHON_USE_TYPE_SLOTS
static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
#else
#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
#endif

/* GetBuiltinName.proto */
static PyObject *__Pyx_GetBuiltinName(PyObject *name);

/* RaiseDoubleKeywords.proto */
static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);

/* ParseKeywords.proto */
static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
    const char* function_name);

/* RaiseArgTupleInvalid.proto */
static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);

/* PyThreadStateGet.proto */
#if CYTHON_FAST_THREAD_STATE
#define __Pyx_PyThreadState_declare  PyThreadState *__pyx_tstate;
#define __Pyx_PyThreadState_assign  __pyx_tstate = __Pyx_PyThreadState_Current;
#define __Pyx_PyErr_Occurred()  __pyx_tstate->curexc_type
#else
#define __Pyx_PyThreadState_declare
#define __Pyx_PyThreadState_assign
#define __Pyx_PyErr_Occurred()  PyErr_Occurred()
#endif

/* PyErrFetchRestore.proto */
#if CYTHON_FAST_THREAD_STATE
#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
#define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
#define __Pyx_ErrFetchWithState(type, value, tb)    __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
#define __Pyx_ErrRestore(type, value, tb)  __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
#define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
#if CYTHON_COMPILING_IN_CPYTHON
#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
#else
#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
#endif
#else
#define __Pyx_PyErr_Clear() PyErr_Clear()
#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
#define __Pyx_ErrRestoreWithState(type, value, tb)  PyErr_Restore(type, value, tb)
#define __Pyx_ErrFetchWithState(type, value, tb)  PyErr_Fetch(type, value, tb)
#define __Pyx_ErrRestoreInState(tstate, type, value, tb)  PyErr_Restore(type, value, tb)
#define __Pyx_ErrFetchInState(tstate, type, value, tb)  PyErr_Fetch(type, value, tb)
#define __Pyx_ErrRestore(type, value, tb)  PyErr_Restore(type, value, tb)
#define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
#endif

/* Profile.proto */
#ifndef CYTHON_PROFILE
#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_PYSTON
  #define CYTHON_PROFILE 0
#else
  #define CYTHON_PROFILE 1
#endif
#endif
#ifndef CYTHON_TRACE_NOGIL
  #define CYTHON_TRACE_NOGIL 0
#else
  #if CYTHON_TRACE_NOGIL && !defined(CYTHON_TRACE)
    #define CYTHON_TRACE 1
  #endif
#endif
#ifndef CYTHON_TRACE
  #define CYTHON_TRACE 0
#endif
#if CYTHON_TRACE
  #undef CYTHON_PROFILE_REUSE_FRAME
#endif
#ifndef CYTHON_PROFILE_REUSE_FRAME
  #define CYTHON_PROFILE_REUSE_FRAME 0
#endif
#if CYTHON_PROFILE || CYTHON_TRACE
  #include "compile.h"
  #include "frameobject.h"
  #include "traceback.h"
  #if CYTHON_PROFILE_REUSE_FRAME
    #define CYTHON_FRAME_MODIFIER static
    #define CYTHON_FRAME_DEL(frame)
  #else
    #define CYTHON_FRAME_MODIFIER
    #define CYTHON_FRAME_DEL(frame) Py_CLEAR(frame)
  #endif
  #define __Pyx_TraceDeclarations\
  static PyCodeObject *__pyx_frame_code = NULL;\
  CYTHON_FRAME_MODIFIER PyFrameObject *__pyx_frame = NULL;\
  int __Pyx_use_tracing = 0;
  #define __Pyx_TraceFrameInit(codeobj)\
  if (codeobj) __pyx_frame_code = (PyCodeObject*) codeobj;
  #ifdef WITH_THREAD
  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil, goto_error)\
  if (nogil) {\
      if (CYTHON_TRACE_NOGIL) {\
          PyThreadState *tstate;\
          PyGILState_STATE state = PyGILState_Ensure();\
          tstate = __Pyx_PyThreadState_Current;\
          if (unlikely(tstate->use_tracing) && !tstate->tracing &&\
                  (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {\
              __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&__pyx_frame_code, &__pyx_frame, tstate, funcname, srcfile, firstlineno);\
          }\
          PyGILState_Release(state);\
          if (unlikely(__Pyx_use_tracing < 0)) goto_error;\
      }\
  } else {\
      PyThreadState* tstate = PyThreadState_GET();\
      if (unlikely(tstate->use_tracing) && !tstate->tracing &&\
              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {\
          __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&__pyx_frame_code, &__pyx_frame, tstate, funcname, srcfile, firstlineno);\
          if (unlikely(__Pyx_use_tracing < 0)) goto_error;\
      }\
  }
  #else
  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil, goto_error)\
  {   PyThreadState* tstate = PyThreadState_GET();\
      if (unlikely(tstate->use_tracing) && !tstate->tracing &&\
              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {\
          __Pyx_use_tracing = __Pyx_TraceSetupAndCall(&__pyx_frame_code, &__pyx_frame, tstate, funcname, srcfile, firstlineno);\
          if (unlikely(__Pyx_use_tracing < 0)) goto_error;\
      }\
  }
  #endif
  #define __Pyx_TraceException()\
  if (likely(!__Pyx_use_tracing)); else {\
      PyThreadState* tstate = __Pyx_PyThreadState_Current;\
      if (tstate->use_tracing &&\
              (tstate->c_profilefunc || (CYTHON_TRACE && tstate->c_tracefunc))) {\
          tstate->tracing++;\
          tstate->use_tracing = 0;\
          PyObject *exc_info = __Pyx_GetExceptionTuple(tstate);\
          if (exc_info) {\
              if (CYTHON_TRACE && tstate->c_tracefunc)\
                  tstate->c_tracefunc(\
                      tstate->c_traceobj, __pyx_frame, PyTrace_EXCEPTION, exc_info);\
              tstate->c_profilefunc(\
                  tstate->c_profileobj, __pyx_frame, PyTrace_EXCEPTION, exc_info);\
              Py_DECREF(exc_info);\
          }\
          tstate->use_tracing = 1;\
          tstate->tracing--;\
      }\
  }
  static void __Pyx_call_return_trace_func(PyThreadState *tstate, PyFrameObject *frame, PyObject *result) {
      PyObject *type, *value, *traceback;
      __Pyx_ErrFetchInState(tstate, &type, &value, &traceback);
      tstate->tracing++;
      tstate->use_tracing = 0;
      if (CYTHON_TRACE && tstate->c_tracefunc)
          tstate->c_tracefunc(tstate->c_traceobj, frame, PyTrace_RETURN, result);
      if (tstate->c_profilefunc)
          tstate->c_profilefunc(tstate->c_profileobj, frame, PyTrace_RETURN, result);
      CYTHON_FRAME_DEL(frame);
      tstate->use_tracing = 1;
      tstate->tracing--;
      __Pyx_ErrRestoreInState(tstate, type, value, traceback);
  }
  #ifdef WITH_THREAD
  #define __Pyx_TraceReturn(result, nogil)\
  if (likely(!__Pyx_use_tracing)); else {\
      if (nogil) {\
          if (CYTHON_TRACE_NOGIL) {\
              PyThreadState *tstate;\
              PyGILState_STATE state = PyGILState_Ensure();\
              tstate = __Pyx_PyThreadState_Current;\
              if (tstate->use_tracing) {\
                  __Pyx_call_return_trace_func(tstate, __pyx_frame, (PyObject*)result);\
              }\
              PyGILState_Release(state);\
          }\
      } else {\
          PyThreadState* tstate = __Pyx_PyThreadState_Current;\
          if (tstate->use_tracing) {\
              __Pyx_call_return_trace_func(tstate, __pyx_frame, (PyObject*)result);\
          }\
      }\
  }
  #else
  #define __Pyx_TraceReturn(result, nogil)\
  if (likely(!__Pyx_use_tracing)); else {\
      PyThreadState* tstate = __Pyx_PyThreadState_Current;\
      if (tstate->use_tracing) {\
          __Pyx_call_return_trace_func(tstate, __pyx_frame, (PyObject*)result);\
      }\
  }
  #endif
  static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno);
  static int __Pyx_TraceSetupAndCall(PyCodeObject** code, PyFrameObject** frame, PyThreadState* tstate, const char *funcname, const char *srcfile, int firstlineno);
#else
  #define __Pyx_TraceDeclarations
  #define __Pyx_TraceFrameInit(codeobj)
  #define __Pyx_TraceCall(funcname, srcfile, firstlineno, nogil, goto_error)   if ((1)); else goto_error;
  #define __Pyx_TraceException()
  #define __Pyx_TraceReturn(result, nogil)
#endif
#if CYTHON_TRACE
  static int __Pyx_call_line_trace_func(PyThreadState *tstate, PyFrameObject *frame, int lineno) {
      int ret;
      PyObject *type, *value, *traceback;
      __Pyx_ErrFetchInState(tstate, &type, &value, &traceback);
      __Pyx_PyFrame_SetLineNumber(frame, lineno);
      tstate->tracing++;
      tstate->use_tracing = 0;
      ret = tstate->c_tracefunc(tstate->c_traceobj, frame, PyTrace_LINE, NULL);
      tstate->use_tracing = 1;
      tstate->tracing--;
      if (likely(!ret)) {
          __Pyx_ErrRestoreInState(tstate, type, value, traceback);
      } else {
          Py_XDECREF(type);
          Py_XDECREF(value);
          Py_XDECREF(traceback);
      }
      return ret;
  }
  #ifdef WITH_THREAD
  #define __Pyx_TraceLine(lineno, nogil, goto_error)\
  if (likely(!__Pyx_use_tracing)); else {\
      if (nogil) {\
          if (CYTHON_TRACE_NOGIL) {\
              int ret = 0;\
              PyThreadState *tstate;\
              PyGILState_STATE state = PyGILState_Ensure();\
              tstate = __Pyx_PyThreadState_Current;\
              if (unlikely(tstate->use_tracing && tstate->c_tracefunc && __pyx_frame->f_trace)) {\
                  ret = __Pyx_call_line_trace_func(tstate, __pyx_frame, lineno);\
              }\
              PyGILState_Release(state);\
              if (unlikely(ret)) goto_error;\
          }\
      } else {\
          PyThreadState* tstate = __Pyx_PyThreadState_Current;\
          if (unlikely(tstate->use_tracing && tstate->c_tracefunc && __pyx_frame->f_trace)) {\
              int ret = __Pyx_call_line_trace_func(tstate, __pyx_frame, lineno);\
              if (unlikely(ret)) goto_error;\
          }\
      }\
  }
  #else
  #define __Pyx_TraceLine(lineno, nogil, goto_error)\
  if (likely(!__Pyx_use_tracing)); else {\
      PyThreadState* tstate = __Pyx_PyThreadState_Current;\
      if (unlikely(tstate->use_tracing && tstate->c_tracefunc && __pyx_frame->f_trace)) {\
          int ret = __Pyx_call_line_trace_func(tstate, __pyx_frame, lineno);\
          if (unlikely(ret)) goto_error;\
      }\
  }
  #endif
#else
  #define __Pyx_TraceLine(lineno, nogil, goto_error)   if ((1)); else goto_error;
#endif

/* PyDictVersioning.proto */
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
#define __PYX_DICT_VERSION_INIT  ((PY_UINT64_T) -1)
#define __PYX_GET_DICT_VERSION(dict)  (((PyDictObject*)(dict))->ma_version_tag)
#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
    (version_var) = __PYX_GET_DICT_VERSION(dict);\
    (cache_var) = (value);
#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
    static PY_UINT64_T __pyx_dict_version = 0;\
    static PyObject *__pyx_dict_cached_value = NULL;\
    if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
        (VAR) = __pyx_dict_cached_value;\
    } else {\
        (VAR) = __pyx_dict_cached_value = (LOOKUP);\
        __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
    }\
}
static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
#else
#define __PYX_GET_DICT_VERSION(dict)  (0)
#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP)  (VAR) = (LOOKUP);
#endif

/* GetModuleGlobalName.proto */
#if CYTHON_USE_DICT_VERSIONS
#define __Pyx_GetModuleGlobalName(var, name)  {\
    static PY_UINT64_T __pyx_dict_version = 0;\
    static PyObject *__pyx_dict_cached_value = NULL;\
    (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
        (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
        __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
}
#define __Pyx_GetModuleGlobalNameUncached(var, name)  {\
    PY_UINT64_T __pyx_dict_version;\
    PyObject *__pyx_dict_cached_value;\
    (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
}
static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
#else
#define __Pyx_GetModuleGlobalName(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
#define __Pyx_GetModuleGlobalNameUncached(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
#endif

/* PyCFunctionFastCall.proto */
#if CYTHON_FAST_PYCCALL
static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs);
#else
#define __Pyx_PyCFunction_FastCall(func, args, nargs)  (assert(0), NULL)
#endif

/* PyFunctionFastCall.proto */
#if CYTHON_FAST_PYCALL
#define __Pyx_PyFunction_FastCall(func, args, nargs)\
    __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
#if 1 || PY_VERSION_HEX < 0x030600B1
static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
#else
#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs)
#endif
#define __Pyx_BUILD_ASSERT_EXPR(cond)\
    (sizeof(char [1 - 2*!(cond)]) - 1)
#ifndef Py_MEMBER_SIZE
#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
#endif
  static size_t __pyx_pyframe_localsplus_offset = 0;
  #include "frameobject.h"
  #define __Pxy_PyFrame_Initialize_Offsets()\
    ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
     (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
  #define __Pyx_PyFrame_GetLocalsplus(frame)\
    (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
#endif

/* PyObjectCall.proto */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
#else
#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
#endif

/* PyObjectCall2Args.proto */
static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2);

/* PyObjectCallMethO.proto */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
#endif

/* PyObjectCallOneArg.proto */
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);

/* PyObjectSetAttrStr.proto */
#if CYTHON_USE_TYPE_SLOTS
#define __Pyx_PyObject_DelAttrStr(o,n) __Pyx_PyObject_SetAttrStr(o, n, NULL)
static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value);
#else
#define __Pyx_PyObject_DelAttrStr(o,n)   PyObject_DelAttr(o,n)
#define __Pyx_PyObject_SetAttrStr(o,n,v) PyObject_SetAttr(o,n,v)
#endif

/* PyObjectCallNoArg.proto */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func);
#else
#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL)
#endif

/* RaiseTooManyValuesToUnpack.proto */
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);

/* RaiseNeedMoreValuesToUnpack.proto */
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);

/* IterFinish.proto */
static CYTHON_INLINE int __Pyx_IterFinish(void);

/* UnpackItemEndCheck.proto */
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected);

/* GetAttr.proto */
static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *);

/* HasAttr.proto */
static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *);

/* IterNext.proto */
#define __Pyx_PyIter_Next(obj) __Pyx_PyIter_Next2(obj, NULL)
static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject *, PyObject *);

/* PyFloatBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_EqObjC(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyFloat_EqObjC(op1, op2, floatval, inplace, zerodivision_check)\
    (PyObject_RichCompare(op1, op2, Py_EQ))
    #endif

/* PyFloatBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_SubtractCObj(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyFloat_SubtractCObj(op1, op2, floatval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceSubtract(op1, op2) : PyNumber_Subtract(op1, op2))
#endif

/* PyIntBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
#endif

/* GetItemInt.proto */
#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
               __Pyx_GetItemInt_Generic(o, to_py_func(i))))
#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
    __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
    (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
                                                              int wraparound, int boundscheck);
#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
    __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
    (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
                                                              int wraparound, int boundscheck);
static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
                                                     int is_list, int wraparound, int boundscheck);

/* ObjectGetItem.proto */
#if CYTHON_USE_TYPE_SLOTS
static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key);
#else
#define __Pyx_PyObject_GetItem(obj, key)  PyObject_GetItem(obj, key)
#endif

/* PyIntCompare.proto */
static CYTHON_INLINE PyObject* __Pyx_PyInt_NeObjC(PyObject *op1, PyObject *op2, long intval, long inplace);

/* RaiseException.proto */
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);

/* PyIntCompare.proto */
static CYTHON_INLINE PyObject* __Pyx_PyInt_EqObjC(PyObject *op1, PyObject *op2, long intval, long inplace);

/* PySequenceContains.proto */
static CYTHON_INLINE int __Pyx_PySequence_ContainsTF(PyObject* item, PyObject* seq, int eq) {
    int result = PySequence_Contains(seq, item);
    return unlikely(result < 0) ? result : (result == (eq == Py_EQ));
}

/* SliceObject.proto */
#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\
    __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)
static CYTHON_INLINE int __Pyx_PyObject_SetSlice(
        PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop,
        PyObject** py_start, PyObject** py_stop, PyObject** py_slice,
        int has_cstart, int has_cstop, int wraparound);

/* PyIntBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_SubtractObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyInt_SubtractObjC(op1, op2, intval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceSubtract(op1, op2) : PyNumber_Subtract(op1, op2))
#endif

/* PyIntBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_SubtractCObj(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyInt_SubtractCObj(op1, op2, intval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceSubtract(op1, op2) : PyNumber_Subtract(op1, op2))
#endif

/* PyIntBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_AddCObj(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyInt_AddCObj(op1, op2, intval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
#endif

/* IncludeStringH.proto */
#include <string.h>

/* BytesEquals.proto */
static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals);

/* UnicodeEquals.proto */
static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals);

/* StrEquals.proto */
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
#else
#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
#endif

/* Import.proto */
static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);

/* ImportFrom.proto */
static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name);

/* KeywordStringCheck.proto */
static int __Pyx_CheckKeywordStrings(PyObject *kwdict, const char* function_name, int kw_allowed);

/* dict_setdefault.proto */
static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value, int is_safe_type);

/* UnpackUnboundCMethod.proto */
typedef struct {
    PyObject *type;
    PyObject **method_name;
    PyCFunction func;
    PyObject *method;
    int flag;
} __Pyx_CachedCFunction;

/* CallUnboundCMethod2.proto */
static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2);
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1
static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2);
#else
#define __Pyx_CallUnboundCMethod2(cfunc, self, arg1, arg2)  __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2)
#endif

/* PyFloatBinop.proto */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_AddCObj(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check);
#else
#define __Pyx_PyFloat_AddCObj(op1, op2, floatval, inplace, zerodivision_check)\
    (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
#endif

/* DictGetItem.proto */
#if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key);
#define __Pyx_PyObject_Dict_GetItem(obj, name)\
    (likely(PyDict_CheckExact(obj)) ?\
     __Pyx_PyDict_GetItem(obj, name) : PyObject_GetItem(obj, name))
#else
#define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key)
#define __Pyx_PyObject_Dict_GetItem(obj, name)  PyObject_GetItem(obj, name)
#endif

/* CalculateMetaclass.proto */
static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases);

/* ClassMethod.proto */
#include "descrobject.h"
static CYTHON_UNUSED PyObject* __Pyx_Method_ClassMethod(PyObject *method);

/* FetchCommonType.proto */
static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);

/* CythonFunctionShared.proto */
#define __Pyx_CyFunction_USED 1
#define __Pyx_CYFUNCTION_STATICMETHOD  0x01
#define __Pyx_CYFUNCTION_CLASSMETHOD   0x02
#define __Pyx_CYFUNCTION_CCLASS        0x04
#define __Pyx_CyFunction_GetClosure(f)\
    (((__pyx_CyFunctionObject *) (f))->func_closure)
#define __Pyx_CyFunction_GetClassObj(f)\
    (((__pyx_CyFunctionObject *) (f))->func_classobj)
#define __Pyx_CyFunction_Defaults(type, f)\
    ((type *)(((__pyx_CyFunctionObject *) (f))->defaults))
#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\
    ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g)
typedef struct {
    PyCFunctionObject func;
#if PY_VERSION_HEX < 0x030500A0
    PyObject *func_weakreflist;
#endif
    PyObject *func_dict;
    PyObject *func_name;
    PyObject *func_qualname;
    PyObject *func_doc;
    PyObject *func_globals;
    PyObject *func_code;
    PyObject *func_closure;
    PyObject *func_classobj;
    void *defaults;
    int defaults_pyobjects;
    size_t defaults_size;  // used by FusedFunction for copying defaults
    int flags;
    PyObject *defaults_tuple;
    PyObject *defaults_kwdict;
    PyObject *(*defaults_getter)(PyObject *);
    PyObject *func_annotations;
} __pyx_CyFunctionObject;
static PyTypeObject *__pyx_CyFunctionType = 0;
#define __Pyx_CyFunction_Check(obj)  (__Pyx_TypeCheck(obj, __pyx_CyFunctionType))
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
                                      int flags, PyObject* qualname,
                                      PyObject *self,
                                      PyObject *module, PyObject *globals,
                                      PyObject* code);
static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
                                                         size_t size,
                                                         int pyobjects);
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m,
                                                            PyObject *tuple);
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m,
                                                             PyObject *dict);
static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
                                                              PyObject *dict);
static int __pyx_CyFunction_init(void);

/* CythonFunction.proto */
static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
                                      int flags, PyObject* qualname,
                                      PyObject *closure,
                                      PyObject *module, PyObject *globals,
                                      PyObject* code);

/* SetNameInClass.proto */
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1
#define __Pyx_SetNameInClass(ns, name, value)\
    (likely(PyDict_CheckExact(ns)) ? _PyDict_SetItem_KnownHash(ns, name, value, ((PyASCIIObject *) name)->hash) : PyObject_SetItem(ns, name, value))
#elif CYTHON_COMPILING_IN_CPYTHON
#define __Pyx_SetNameInClass(ns, name, value)\
    (likely(PyDict_CheckExact(ns)) ? PyDict_SetItem(ns, name, value) : PyObject_SetItem(ns, name, value))
#else
#define __Pyx_SetNameInClass(ns, name, value)  PyObject_SetItem(ns, name, value)
#endif

/* Py3ClassCreate.proto */
static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name, PyObject *qualname,
                                           PyObject *mkw, PyObject *modname, PyObject *doc);
static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases, PyObject *dict,
                                      PyObject *mkw, int calculate_metaclass, int allow_py2_metaclass);

/* CLineInTraceback.proto */
#ifdef CYTHON_CLINE_IN_TRACEBACK
#define __Pyx_CLineForTraceback(tstate, c_line)  (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
#else
static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
#endif

/* CodeObjectCache.proto */
typedef struct {
    PyCodeObject* code_object;
    int code_line;
} __Pyx_CodeObjectCacheEntry;
struct __Pyx_CodeObjectCache {
    int count;
    int max_count;
    __Pyx_CodeObjectCacheEntry* entries;
};
static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
static PyCodeObject *__pyx_find_code_object(int code_line);
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);

/* AddTraceback.proto */
static void __Pyx_AddTraceback(const char *funcname, int c_line,
                               int py_line, const char *filename);

/* CIntToPy.proto */
static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);

/* CIntFromPy.proto */
static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);

/* CIntFromPy.proto */
static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);

/* FastTypeChecks.proto */
#if CYTHON_COMPILING_IN_CPYTHON
#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
#else
#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
#endif
#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)

/* CheckBinaryVersion.proto */
static int __Pyx_check_binary_version(void);

/* InitStrings.proto */
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);


/* Module declarations from 'cython' */

/* Module declarations from 'libc.string' */

/* Module declarations from 'libc.math' */

/* Module declarations from 'libc.stdlib' */

/* Module declarations from 'libc.stdint' */

/* Module declarations from 'thinc.typedefs' */

/* Module declarations from 'thinc.neural.optimizers' */
#define __Pyx_MODULE_NAME "thinc.neural.optimizers"
extern int __pyx_module_is_main_thinc__neural__optimizers;
int __pyx_module_is_main_thinc__neural__optimizers = 0;

/* Implementation of 'thinc.neural.optimizers' */
static PyObject *__pyx_builtin_object;
static PyObject *__pyx_builtin_property;
static PyObject *__pyx_builtin_NotImplementedError;
static const char __pyx_k_f[] = "f";
static const char __pyx_k_L2[] = "L2";
static const char __pyx_k_b1[] = "b1";
static const char __pyx_k_b2[] = "b2";
static const char __pyx_k_lr[] = "lr";
static const char __pyx_k_xp[] = "xp";
static const char __pyx_k_SGD[] = "SGD";
static const char __pyx_k__47[] = "_";
static const char __pyx_k_cls[] = "cls";
static const char __pyx_k_doc[] = "__doc__";
static const char __pyx_k_eps[] = "eps";
static const char __pyx_k_get[] = "get";
static const char __pyx_k_key[] = "key";
static const char __pyx_k_ops[] = "ops";
static const char __pyx_k_r_t[] = "r_t";
static const char __pyx_k_Adam[] = "Adam";
static const char __pyx_k_adam[] = "_adam";
static const char __pyx_k_args[] = "args";
static const char __pyx_k_call[] = "__call__";
static const char __pyx_k_fill[] = "fill";
static const char __pyx_k_fix1[] = "fix1";
static const char __pyx_k_fix2[] = "fix2";
static const char __pyx_k_init[] = "__init__";
static const char __pyx_k_main[] = "__main__";
static const char __pyx_k_math[] = "math";
static const char __pyx_k_mom1[] = "mom1";
static const char __pyx_k_mom2[] = "mom2";
static const char __pyx_k_name[] = "__name__";
static const char __pyx_k_norm[] = "norm";
static const char __pyx_k_rate[] = "rate";
static const char __pyx_k_self[] = "self";
static const char __pyx_k_size[] = "size";
static const char __pyx_k_slow[] = "slow";
static const char __pyx_k_sqrt[] = "sqrt";
static const char __pyx_k_test[] = "__test__";
static const char __pyx_k_util[] = "util";
static const char __pyx_k_Model[] = "Model";
static const char __pyx_k_alpha[] = "alpha";
static const char __pyx_k_beta1[] = "beta1";
static const char __pyx_k_beta2[] = "beta2";
static const char __pyx_k_decay[] = "decay";
static const char __pyx_k_dtype[] = "dtype";
static const char __pyx_k_items[] = "items";
static const char __pyx_k_numpy[] = "numpy";
static const char __pyx_k_phi_p[] = "phi_p";
static const char __pyx_k_radam[] = "_radam";
static const char __pyx_k_shape[] = "shape";
static const char __pyx_k_sma_t[] = "sma_t";
static const char __pyx_k_value[] = "value";
static const char __pyx_k_SGD_v1[] = "SGD.v1";
static const char __pyx_k_adam_2[] = "adam";
static const char __pyx_k_anneal[] = "anneal";
static const char __pyx_k_config[] = "config";
static const char __pyx_k_import[] = "__import__";
static const char __pyx_k_kwargs[] = "kwargs";
static const char __pyx_k_linalg[] = "linalg";
static const char __pyx_k_module[] = "__module__";
static const char __pyx_k_nr_upd[] = "nr_upd";
static const char __pyx_k_object[] = "object";
static const char __pyx_k_params[] = "params";
static const char __pyx_k_setter[] = "setter";
static const char __pyx_k_to_cpu[] = "to_cpu";
static const char __pyx_k_to_gpu[] = "to_gpu";
static const char __pyx_k_u_norm[] = "u_norm";
static const char __pyx_k_update[] = "update";
static const char __pyx_k_w_norm[] = "w_norm";
static const char __pyx_k_Adam_v1[] = "Adam.v1";
static const char __pyx_k_CupyOps[] = "CupyOps";
static const char __pyx_k_asarray[] = "asarray";
static const char __pyx_k_exp_avg[] = "exp_avg";
static const char __pyx_k_float32[] = "float32";
static const char __pyx_k_prepare[] = "__prepare__";
static const char __pyx_k_sma_inf[] = "sma_inf";
static const char __pyx_k_weights[] = "weights";
static const char __pyx_k_NumpyOps[] = "NumpyOps";
static const char __pyx_k_RAdam_v1[] = "RAdam.v1";
static const char __pyx_k_allocate[] = "allocate";
static const char __pyx_k_averages[] = "averages";
static const char __pyx_k_b1_decay[] = "b1_decay";
static const char __pyx_k_b2_decay[] = "b2_decay";
static const char __pyx_k_gradient[] = "gradient";
static const char __pyx_k_lars_max[] = "lars_max";
static const char __pyx_k_lars_min[] = "lars_min";
static const char __pyx_k_local_lr[] = "local_lr";
static const char __pyx_k_lr_scale[] = "lr_scale";
static const char __pyx_k_make_ops[] = "_make_ops";
static const char __pyx_k_momentum[] = "momentum";
static const char __pyx_k_nesterov[] = "nesterov";
static const char __pyx_k_property[] = "property";
static const char __pyx_k_qualname[] = "__qualname__";
static const char __pyx_k_register[] = "register";
static const char __pyx_k_registry[] = "registry";
static const char __pyx_k_schedule[] = "schedule";
static const char __pyx_k_use_lars[] = "use_lars";
static const char __pyx_k_Optimizer[] = "Optimizer";
static const char __pyx_k_last_seen[] = "last_seen";
static const char __pyx_k_metaclass[] = "__metaclass__";
static const char __pyx_k_nr_update[] = "nr_update";
static const char __pyx_k_schedules[] = "schedules";
static const char __pyx_k_use_radam[] = "use_radam";
static const char __pyx_k_create_SGD[] = "create_SGD";
static const char __pyx_k_exp_avg_sq[] = "exp_avg_sq";
static const char __pyx_k_learn_rate[] = "learn_rate";
static const char __pyx_k_nesterov_2[] = "_nesterov";
static const char __pyx_k_optimizers[] = "optimizers";
static const char __pyx_k_registry_2[] = "_registry";
static const char __pyx_k_setdefault[] = "setdefault";
static const char __pyx_k_collections[] = "collections";
static const char __pyx_k_create_Adam[] = "create_Adam";
static const char __pyx_k_decay_steps[] = "decay_steps";
static const char __pyx_k_defaultdict[] = "defaultdict";
static const char __pyx_k_from_config[] = "from_config";
static const char __pyx_k_lookahead_k[] = "lookahead_k";
static const char __pyx_k_Optimizer_lr[] = "Optimizer.lr";
static const char __pyx_k_SGD_DEFAULTS[] = "SGD_DEFAULTS";
static const char __pyx_k_create_RAdam[] = "create_RAdam";
static const char __pyx_k_linear_decay[] = "linear_decay";
static const char __pyx_k_slow_weights[] = "slow_weights";
static const char __pyx_k_use_averages[] = "use_averages";
static const char __pyx_k_ADAM_DEFAULTS[] = "ADAM_DEFAULTS";
static const char __pyx_k_classes_model[] = "_classes.model";
static const char __pyx_k_clip_gradient[] = "clip_gradient";
static const char __pyx_k_max_grad_norm[] = "max_grad_norm";
static const char __pyx_k_gradient_noise[] = "gradient_noise";
static const char __pyx_k_optimizers_pyx[] = "optimizers.pyx";
static const char __pyx_k_step_schedules[] = "step_schedules";
static const char __pyx_k_Optimizer__adam[] = "Optimizer._adam";
static const char __pyx_k_lookahead_alpha[] = "lookahead_alpha";
static const char __pyx_k_update_averages[] = "update_averages";
static const char __pyx_k_Optimizer___call[] = "Optimizer.__call__";
static const char __pyx_k_Optimizer___init[] = "Optimizer.__init__";
static const char __pyx_k_Optimizer__radam[] = "Optimizer._radam";
static const char __pyx_k_Optimizer_to_cpu[] = "Optimizer.to_cpu";
static const char __pyx_k_Optimizer_to_gpu[] = "Optimizer.to_gpu";
static const char __pyx_k_bias_correction1[] = "bias_correction1";
static const char __pyx_k_bias_correction2[] = "bias_correction2";
static const char __pyx_k_get_array_module[] = "get_array_module";
static const char __pyx_k_make_from_config[] = "make_from_config";
static const char __pyx_k_L2_is_weight_decay[] = "L2_is_weight_decay";
static const char __pyx_k_add_gradient_noise[] = "add_gradient_noise";
static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
static const char __pyx_k_NotImplementedError[] = "NotImplementedError";
static const char __pyx_k_Optimizer__nesterov[] = "Optimizer._nesterov";
static const char __pyx_k_Optimizer_learn_rate[] = "Optimizer.learn_rate";
static const char __pyx_k_Optimizer_from_config[] = "Optimizer.from_config";
static const char __pyx_k_thinc_neural_optimizers[] = "thinc.neural.optimizers";
static const char __pyx_k_Optimizer_step_schedules[] = "Optimizer.step_schedules";
static const char __pyx_k_Do_various_flavours_of_stochasti[] = "Do various flavours of stochastic gradient descent, with first and\n    second order momentum.\n    \n    Examples\n    \n    * beta1=0., beta2=0.: \"vanilla\" SGD\n    * beta1=0.9, beta2=0.: \"Classic momentum\"\n    * beta1=0.0, beta2=0.2: RMS prop\n    * b1=0.999, b2=0.9: Adam\n    ";
static PyObject *__pyx_n_s_ADAM_DEFAULTS;
static PyObject *__pyx_n_s_Adam;
static PyObject *__pyx_kp_s_Adam_v1;
static PyObject *__pyx_n_s_CupyOps;
static PyObject *__pyx_kp_s_Do_various_flavours_of_stochasti;
static PyObject *__pyx_n_s_L2;
static PyObject *__pyx_n_s_L2_is_weight_decay;
static PyObject *__pyx_n_s_Model;
static PyObject *__pyx_n_s_NotImplementedError;
static PyObject *__pyx_n_s_NumpyOps;
static PyObject *__pyx_n_s_Optimizer;
static PyObject *__pyx_n_s_Optimizer___call;
static PyObject *__pyx_n_s_Optimizer___init;
static PyObject *__pyx_n_s_Optimizer__adam;
static PyObject *__pyx_n_s_Optimizer__nesterov;
static PyObject *__pyx_n_s_Optimizer__radam;
static PyObject *__pyx_n_s_Optimizer_from_config;
static PyObject *__pyx_n_s_Optimizer_learn_rate;
static PyObject *__pyx_n_s_Optimizer_lr;
static PyObject *__pyx_n_s_Optimizer_step_schedules;
static PyObject *__pyx_n_s_Optimizer_to_cpu;
static PyObject *__pyx_n_s_Optimizer_to_gpu;
static PyObject *__pyx_kp_s_RAdam_v1;
static PyObject *__pyx_n_s_SGD;
static PyObject *__pyx_n_s_SGD_DEFAULTS;
static PyObject *__pyx_kp_s_SGD_v1;
static PyObject *__pyx_n_s__47;
static PyObject *__pyx_n_s_adam;
static PyObject *__pyx_n_s_adam_2;
static PyObject *__pyx_n_s_add_gradient_noise;
static PyObject *__pyx_n_s_allocate;
static PyObject *__pyx_n_s_alpha;
static PyObject *__pyx_n_s_anneal;
static PyObject *__pyx_n_s_args;
static PyObject *__pyx_n_s_asarray;
static PyObject *__pyx_n_s_averages;
static PyObject *__pyx_n_s_b1;
static PyObject *__pyx_n_s_b1_decay;
static PyObject *__pyx_n_s_b2;
static PyObject *__pyx_n_s_b2_decay;
static PyObject *__pyx_n_s_beta1;
static PyObject *__pyx_n_s_beta2;
static PyObject *__pyx_n_s_bias_correction1;
static PyObject *__pyx_n_s_bias_correction2;
static PyObject *__pyx_n_s_call;
static PyObject *__pyx_n_s_classes_model;
static PyObject *__pyx_n_s_cline_in_traceback;
static PyObject *__pyx_n_s_clip_gradient;
static PyObject *__pyx_n_s_cls;
static PyObject *__pyx_n_s_collections;
static PyObject *__pyx_n_s_config;
static PyObject *__pyx_n_s_create_Adam;
static PyObject *__pyx_n_s_create_RAdam;
static PyObject *__pyx_n_s_create_SGD;
static PyObject *__pyx_n_s_decay;
static PyObject *__pyx_n_s_decay_steps;
static PyObject *__pyx_n_s_defaultdict;
static PyObject *__pyx_n_s_doc;
static PyObject *__pyx_n_s_dtype;
static PyObject *__pyx_n_s_eps;
static PyObject *__pyx_n_s_exp_avg;
static PyObject *__pyx_n_s_exp_avg_sq;
static PyObject *__pyx_n_s_f;
static PyObject *__pyx_n_s_fill;
static PyObject *__pyx_n_s_fix1;
static PyObject *__pyx_n_s_fix2;
static PyObject *__pyx_n_s_float32;
static PyObject *__pyx_n_s_from_config;
static PyObject *__pyx_n_s_get;
static PyObject *__pyx_n_s_get_array_module;
static PyObject *__pyx_n_s_gradient;
static PyObject *__pyx_n_s_gradient_noise;
static PyObject *__pyx_n_s_import;
static PyObject *__pyx_n_s_init;
static PyObject *__pyx_n_s_items;
static PyObject *__pyx_n_s_key;
static PyObject *__pyx_n_s_kwargs;
static PyObject *__pyx_n_s_lars_max;
static PyObject *__pyx_n_s_lars_min;
static PyObject *__pyx_n_s_last_seen;
static PyObject *__pyx_n_s_learn_rate;
static PyObject *__pyx_n_s_linalg;
static PyObject *__pyx_n_s_linear_decay;
static PyObject *__pyx_n_s_local_lr;
static PyObject *__pyx_n_s_lookahead_alpha;
static PyObject *__pyx_n_s_lookahead_k;
static PyObject *__pyx_n_s_lr;
static PyObject *__pyx_n_s_lr_scale;
static PyObject *__pyx_n_s_main;
static PyObject *__pyx_n_s_make_from_config;
static PyObject *__pyx_n_s_make_ops;
static PyObject *__pyx_n_s_math;
static PyObject *__pyx_n_s_max_grad_norm;
static PyObject *__pyx_n_s_metaclass;
static PyObject *__pyx_n_s_module;
static PyObject *__pyx_n_s_mom1;
static PyObject *__pyx_n_s_mom2;
static PyObject *__pyx_n_s_momentum;
static PyObject *__pyx_n_s_name;
static PyObject *__pyx_n_s_nesterov;
static PyObject *__pyx_n_s_nesterov_2;
static PyObject *__pyx_n_s_norm;
static PyObject *__pyx_n_s_nr_upd;
static PyObject *__pyx_n_s_nr_update;
static PyObject *__pyx_n_s_numpy;
static PyObject *__pyx_n_s_object;
static PyObject *__pyx_n_s_ops;
static PyObject *__pyx_n_s_optimizers;
static PyObject *__pyx_kp_s_optimizers_pyx;
static PyObject *__pyx_n_s_params;
static PyObject *__pyx_n_s_phi_p;
static PyObject *__pyx_n_s_prepare;
static PyObject *__pyx_n_s_property;
static PyObject *__pyx_n_s_qualname;
static PyObject *__pyx_n_s_r_t;
static PyObject *__pyx_n_s_radam;
static PyObject *__pyx_n_s_rate;
static PyObject *__pyx_n_s_register;
static PyObject *__pyx_n_s_registry;
static PyObject *__pyx_n_s_registry_2;
static PyObject *__pyx_n_s_schedule;
static PyObject *__pyx_n_s_schedules;
static PyObject *__pyx_n_s_self;
static PyObject *__pyx_n_s_setdefault;
static PyObject *__pyx_n_s_setter;
static PyObject *__pyx_n_s_shape;
static PyObject *__pyx_n_s_size;
static PyObject *__pyx_n_s_slow;
static PyObject *__pyx_n_s_slow_weights;
static PyObject *__pyx_n_s_sma_inf;
static PyObject *__pyx_n_s_sma_t;
static PyObject *__pyx_n_s_sqrt;
static PyObject *__pyx_n_s_step_schedules;
static PyObject *__pyx_n_s_test;
static PyObject *__pyx_n_s_thinc_neural_optimizers;
static PyObject *__pyx_n_s_to_cpu;
static PyObject *__pyx_n_s_to_gpu;
static PyObject *__pyx_n_s_u_norm;
static PyObject *__pyx_n_s_update;
static PyObject *__pyx_n_s_update_averages;
static PyObject *__pyx_n_s_use_averages;
static PyObject *__pyx_n_s_use_lars;
static PyObject *__pyx_n_s_use_radam;
static PyObject *__pyx_n_s_util;
static PyObject *__pyx_n_s_value;
static PyObject *__pyx_n_s_w_norm;
static PyObject *__pyx_n_s_weights;
static PyObject *__pyx_n_s_xp;
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_create_RAdam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_schedules, PyObject *__pyx_v_ops); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_2create_Adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_ops, PyObject *__pyx_v_schedules); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_4create_SGD(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_ops, PyObject *__pyx_v_L2, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_schedules); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_from_config(CYTHON_UNUSED PyObject *__pyx_self, CYTHON_UNUSED PyObject *__pyx_v_cls, PyObject *__pyx_v_config); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_2__init__(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_ops, PyObject *__pyx_v_lr, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_gradient_noise, PyObject *__pyx_v_nesterov, PyObject *__pyx_v_L2_is_weight_decay, PyObject *__pyx_v_lookahead_k, PyObject *__pyx_v_lookahead_alpha, PyObject *__pyx_v_use_averages, PyObject *__pyx_v_use_radam, PyObject *__pyx_v_use_lars, PyObject *__pyx_v_schedule, CYTHON_UNUSED PyObject *__pyx_v__); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_4to_gpu(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_6to_cpu(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_8step_schedules(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_10learn_rate(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_12learn_rate(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_learn_rate); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_14lr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_nr_upd); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_16__call__(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_18_radam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key, PyObject *__pyx_v_nr_upd); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_20_nesterov(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_22_adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key, PyObject *__pyx_v_nr_upd); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_6_make_ops(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ops); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_8Adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_args, PyObject *__pyx_v_kwargs); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_10SGD(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_args, PyObject *__pyx_v_kwargs); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_12linear_decay(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_rate, PyObject *__pyx_v_decay, PyObject *__pyx_v_nr_upd); /* proto */
static PyObject *__pyx_pf_5thinc_6neural_10optimizers_14anneal(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_rate, PyObject *__pyx_v_decay, PyObject *__pyx_v_decay_steps, PyObject *__pyx_v_nr_upd); /* proto */
static __Pyx_CachedCFunction __pyx_umethod_PyDict_Type_setdefault = {0, &__pyx_n_s_setdefault, 0, 0, 0};
static PyObject *__pyx_float_0_;
static PyObject *__pyx_float_1_;
static PyObject *__pyx_float_0_0;
static PyObject *__pyx_float_0_5;
static PyObject *__pyx_float_0_9;
static PyObject *__pyx_float_10_;
static PyObject *__pyx_float_0_90;
static PyObject *__pyx_float_1eneg_4;
static PyObject *__pyx_float_0_001;
static PyObject *__pyx_float_0_999;
static PyObject *__pyx_float_1eneg_08;
static PyObject *__pyx_int_0;
static PyObject *__pyx_int_1;
static PyObject *__pyx_int_2;
static PyObject *__pyx_int_4;
static PyObject *__pyx_int_10;
static PyObject *__pyx_k_;
static PyObject *__pyx_k__2;
static PyObject *__pyx_k__3;
static PyObject *__pyx_k__4;
static PyObject *__pyx_k__5;
static PyObject *__pyx_k__6;
static PyObject *__pyx_k__7;
static PyObject *__pyx_k__9;
static PyObject *__pyx_k__10;
static PyObject *__pyx_k__11;
static PyObject *__pyx_k__12;
static PyObject *__pyx_k__13;
static PyObject *__pyx_k__14;
static PyObject *__pyx_k__15;
static PyObject *__pyx_k__17;
static PyObject *__pyx_k__18;
static PyObject *__pyx_k__19;
static PyObject *__pyx_slice__30;
static PyObject *__pyx_tuple__39;
static PyObject *__pyx_tuple__40;
static PyObject *__pyx_tuple__41;
static PyObject *__pyx_tuple__42;
static PyObject *__pyx_tuple__43;
static PyObject *__pyx_tuple__44;
static PyObject *__pyx_tuple__45;
static PyObject *__pyx_tuple__46;
static PyObject *__pyx_tuple__48;
static PyObject *__pyx_tuple__49;
static PyObject *__pyx_tuple__50;
static PyObject *__pyx_tuple__51;
static PyObject *__pyx_tuple__52;
static PyObject *__pyx_tuple__53;
static PyObject *__pyx_tuple__54;
static PyObject *__pyx_tuple__55;
static PyObject *__pyx_tuple__56;
static PyObject *__pyx_tuple__57;
static PyObject *__pyx_tuple__58;
static PyObject *__pyx_tuple__59;
static PyObject *__pyx_tuple__60;
static PyObject *__pyx_tuple__61;
static PyObject *__pyx_tuple__62;
static PyObject *__pyx_tuple__63;
static PyObject *__pyx_tuple__64;
static PyObject *__pyx_tuple__65;
static PyObject *__pyx_codeobj__8;
static PyObject *__pyx_codeobj__16;
static PyObject *__pyx_codeobj__20;
static PyObject *__pyx_codeobj__21;
static PyObject *__pyx_codeobj__22;
static PyObject *__pyx_codeobj__23;
static PyObject *__pyx_codeobj__24;
static PyObject *__pyx_codeobj__25;
static PyObject *__pyx_codeobj__26;
static PyObject *__pyx_codeobj__27;
static PyObject *__pyx_codeobj__28;
static PyObject *__pyx_codeobj__29;
static PyObject *__pyx_codeobj__31;
static PyObject *__pyx_codeobj__32;
static PyObject *__pyx_codeobj__33;
static PyObject *__pyx_codeobj__34;
static PyObject *__pyx_codeobj__35;
static PyObject *__pyx_codeobj__36;
static PyObject *__pyx_codeobj__37;
static PyObject *__pyx_codeobj__38;
/* Late includes */

/* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_1create_RAdam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_1create_RAdam = {"create_RAdam", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_1create_RAdam, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_1create_RAdam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_learn_rate = 0;
  PyObject *__pyx_v_L2 = 0;
  PyObject *__pyx_v_beta1 = 0;
  PyObject *__pyx_v_beta2 = 0;
  PyObject *__pyx_v_eps = 0;
  PyObject *__pyx_v_max_grad_norm = 0;
  PyObject *__pyx_v_L2_is_weight_decay = 0;
  CYTHON_UNUSED PyObject *__pyx_v_use_averages = 0;
  PyObject *__pyx_v_schedules = 0;
  PyObject *__pyx_v_ops = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("create_RAdam (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_learn_rate,&__pyx_n_s_L2,&__pyx_n_s_beta1,&__pyx_n_s_beta2,&__pyx_n_s_eps,&__pyx_n_s_max_grad_norm,&__pyx_n_s_L2_is_weight_decay,&__pyx_n_s_use_averages,&__pyx_n_s_schedules,&__pyx_n_s_ops,0};
    PyObject* values[10] = {0,0,0,0,0,0,0,0,0,0};
    values[0] = __pyx_k_;
    values[1] = __pyx_k__2;
    values[2] = __pyx_k__3;
    values[3] = __pyx_k__4;
    values[4] = __pyx_k__5;
    values[5] = __pyx_k__6;
    values[6] = __pyx_k__7;

    /* "thinc/neural/optimizers.pyx":46
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,             # <<<<<<<<<<<<<<
 *         schedules=None,
 *         ops=None,
 */
    values[7] = ((PyObject *)Py_True);

    /* "thinc/neural/optimizers.pyx":47
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 *         schedules=None,             # <<<<<<<<<<<<<<
 *         ops=None,
 * ):
 */
    values[8] = ((PyObject *)Py_None);

    /* "thinc/neural/optimizers.pyx":48
 *         use_averages=True,
 *         schedules=None,
 *         ops=None,             # <<<<<<<<<<<<<<
 * ):
 *     ops = _make_ops(ops)
 */
    values[9] = ((PyObject *)Py_None);
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_learn_rate);
          if (value) { values[0] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  1:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2);
          if (value) { values[1] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta1);
          if (value) { values[2] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta2);
          if (value) { values[3] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_eps);
          if (value) { values[4] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_grad_norm);
          if (value) { values[5] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2_is_weight_decay);
          if (value) { values[6] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  7:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_averages);
          if (value) { values[7] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  8:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_schedules);
          if (value) { values[8] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  9:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ops);
          if (value) { values[9] = value; kw_args--; }
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "create_RAdam") < 0)) __PYX_ERR(0, 39, __pyx_L3_error)
      }
    } else {
      switch (PyTuple_GET_SIZE(__pyx_args)) {
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
    }
    __pyx_v_learn_rate = values[0];
    __pyx_v_L2 = values[1];
    __pyx_v_beta1 = values[2];
    __pyx_v_beta2 = values[3];
    __pyx_v_eps = values[4];
    __pyx_v_max_grad_norm = values[5];
    __pyx_v_L2_is_weight_decay = values[6];
    __pyx_v_use_averages = values[7];
    __pyx_v_schedules = values[8];
    __pyx_v_ops = values[9];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("create_RAdam", 0, 0, 10, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 39, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.create_RAdam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_create_RAdam(__pyx_self, __pyx_v_learn_rate, __pyx_v_L2, __pyx_v_beta1, __pyx_v_beta2, __pyx_v_eps, __pyx_v_max_grad_norm, __pyx_v_L2_is_weight_decay, __pyx_v_use_averages, __pyx_v_schedules, __pyx_v_ops);

  /* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_create_RAdam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_schedules, PyObject *__pyx_v_ops) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__8)
  __Pyx_RefNannySetupContext("create_RAdam", 0);
  __Pyx_TraceCall("create_RAdam", __pyx_f[0], 39, 0, __PYX_ERR(0, 39, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_ops);

  /* "thinc/neural/optimizers.pyx":50
 *         ops=None,
 * ):
 *     ops = _make_ops(ops)             # <<<<<<<<<<<<<<
 *     return Optimizer(
 *         ops,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_make_ops); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 50, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_3)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_3);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_3, __pyx_v_ops) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_ops);
  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 50, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF_SET(__pyx_v_ops, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":51
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(             # <<<<<<<<<<<<<<
 *         ops,
 *         learn_rate,
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Optimizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 51, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);

  /* "thinc/neural/optimizers.pyx":53
 *     return Optimizer(
 *         ops,
 *         learn_rate,             # <<<<<<<<<<<<<<
 *         L2=L2,
 *         beta1=beta1,
 */
  __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 51, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_v_ops);
  __Pyx_GIVEREF(__pyx_v_ops);
  PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_ops);
  __Pyx_INCREF(__pyx_v_learn_rate);
  __Pyx_GIVEREF(__pyx_v_learn_rate);
  PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_learn_rate);

  /* "thinc/neural/optimizers.pyx":54
 *         ops,
 *         learn_rate,
 *         L2=L2,             # <<<<<<<<<<<<<<
 *         beta1=beta1,
 *         beta2=beta2,
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(13); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 54, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2, __pyx_v_L2) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":55
 *         learn_rate,
 *         L2=L2,
 *         beta1=beta1,             # <<<<<<<<<<<<<<
 *         beta2=beta2,
 *         eps=eps,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta1, __pyx_v_beta1) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":56
 *         L2=L2,
 *         beta1=beta1,
 *         beta2=beta2,             # <<<<<<<<<<<<<<
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta2, __pyx_v_beta2) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":57
 *         beta1=beta1,
 *         beta2=beta2,
 *         eps=eps,             # <<<<<<<<<<<<<<
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_eps, __pyx_v_eps) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":58
 *         beta2=beta2,
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,             # <<<<<<<<<<<<<<
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_max_grad_norm, __pyx_v_max_grad_norm) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":59
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,             # <<<<<<<<<<<<<<
 *         schedules=schedules,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2_is_weight_decay, __pyx_v_L2_is_weight_decay) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":60
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,             # <<<<<<<<<<<<<<
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 *         use_averages=True,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_schedules, __pyx_v_schedules) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":61
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,             # <<<<<<<<<<<<<<
 *         use_averages=True,
 *         use_radam=True, use_lars=False
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_nesterov, Py_None) < 0) __PYX_ERR(0, 54, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_lookahead_k, __pyx_int_0) < 0) __PYX_ERR(0, 54, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_lookahead_alpha, __pyx_int_0) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":62
 *         schedules=schedules,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 *         use_averages=True,             # <<<<<<<<<<<<<<
 *         use_radam=True, use_lars=False
 *     )
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_averages, Py_True) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":63
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 *         use_averages=True,
 *         use_radam=True, use_lars=False             # <<<<<<<<<<<<<<
 *     )
 * 
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_radam, Py_True) < 0) __PYX_ERR(0, 54, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_lars, Py_False) < 0) __PYX_ERR(0, 54, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":51
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(             # <<<<<<<<<<<<<<
 *         ops,
 *         learn_rate,
 */
  __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 51, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("thinc.neural.optimizers.create_RAdam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_ops);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_3create_Adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_3create_Adam = {"create_Adam", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_3create_Adam, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_3create_Adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_learn_rate = 0;
  PyObject *__pyx_v_L2 = 0;
  PyObject *__pyx_v_beta1 = 0;
  PyObject *__pyx_v_beta2 = 0;
  PyObject *__pyx_v_eps = 0;
  PyObject *__pyx_v_max_grad_norm = 0;
  PyObject *__pyx_v_L2_is_weight_decay = 0;
  CYTHON_UNUSED PyObject *__pyx_v_use_averages = 0;
  PyObject *__pyx_v_ops = 0;
  PyObject *__pyx_v_schedules = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("create_Adam (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_learn_rate,&__pyx_n_s_L2,&__pyx_n_s_beta1,&__pyx_n_s_beta2,&__pyx_n_s_eps,&__pyx_n_s_max_grad_norm,&__pyx_n_s_L2_is_weight_decay,&__pyx_n_s_use_averages,&__pyx_n_s_ops,&__pyx_n_s_schedules,0};
    PyObject* values[10] = {0,0,0,0,0,0,0,0,0,0};
    values[0] = __pyx_k__9;
    values[1] = __pyx_k__10;
    values[2] = __pyx_k__11;
    values[3] = __pyx_k__12;
    values[4] = __pyx_k__13;
    values[5] = __pyx_k__14;
    values[6] = __pyx_k__15;

    /* "thinc/neural/optimizers.pyx":75
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         schedules=None
 */
    values[7] = ((PyObject *)Py_True);

    /* "thinc/neural/optimizers.pyx":76
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 *         ops=None,             # <<<<<<<<<<<<<<
 *         schedules=None
 * ):
 */
    values[8] = ((PyObject *)Py_None);

    /* "thinc/neural/optimizers.pyx":77
 *         use_averages=True,
 *         ops=None,
 *         schedules=None             # <<<<<<<<<<<<<<
 * ):
 *     ops = _make_ops(ops)
 */
    values[9] = ((PyObject *)Py_None);
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_learn_rate);
          if (value) { values[0] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  1:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2);
          if (value) { values[1] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta1);
          if (value) { values[2] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta2);
          if (value) { values[3] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_eps);
          if (value) { values[4] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_grad_norm);
          if (value) { values[5] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2_is_weight_decay);
          if (value) { values[6] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  7:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_averages);
          if (value) { values[7] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  8:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ops);
          if (value) { values[8] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  9:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_schedules);
          if (value) { values[9] = value; kw_args--; }
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "create_Adam") < 0)) __PYX_ERR(0, 68, __pyx_L3_error)
      }
    } else {
      switch (PyTuple_GET_SIZE(__pyx_args)) {
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
    }
    __pyx_v_learn_rate = values[0];
    __pyx_v_L2 = values[1];
    __pyx_v_beta1 = values[2];
    __pyx_v_beta2 = values[3];
    __pyx_v_eps = values[4];
    __pyx_v_max_grad_norm = values[5];
    __pyx_v_L2_is_weight_decay = values[6];
    __pyx_v_use_averages = values[7];
    __pyx_v_ops = values[8];
    __pyx_v_schedules = values[9];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("create_Adam", 0, 0, 10, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 68, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.create_Adam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_2create_Adam(__pyx_self, __pyx_v_learn_rate, __pyx_v_L2, __pyx_v_beta1, __pyx_v_beta2, __pyx_v_eps, __pyx_v_max_grad_norm, __pyx_v_L2_is_weight_decay, __pyx_v_use_averages, __pyx_v_ops, __pyx_v_schedules);

  /* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_2create_Adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_ops, PyObject *__pyx_v_schedules) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__16)
  __Pyx_RefNannySetupContext("create_Adam", 0);
  __Pyx_TraceCall("create_Adam", __pyx_f[0], 68, 0, __PYX_ERR(0, 68, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_ops);

  /* "thinc/neural/optimizers.pyx":79
 *         schedules=None
 * ):
 *     ops = _make_ops(ops)             # <<<<<<<<<<<<<<
 *     return Optimizer(
 *         ops,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_make_ops); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 79, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_3)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_3);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_3, __pyx_v_ops) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_ops);
  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 79, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF_SET(__pyx_v_ops, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":80
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(             # <<<<<<<<<<<<<<
 *         ops,
 *         learn_rate,
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Optimizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 80, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);

  /* "thinc/neural/optimizers.pyx":82
 *     return Optimizer(
 *         ops,
 *         learn_rate,             # <<<<<<<<<<<<<<
 *         L2=L2,
 *         beta1=beta1,
 */
  __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 80, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_v_ops);
  __Pyx_GIVEREF(__pyx_v_ops);
  PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_ops);
  __Pyx_INCREF(__pyx_v_learn_rate);
  __Pyx_GIVEREF(__pyx_v_learn_rate);
  PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_learn_rate);

  /* "thinc/neural/optimizers.pyx":83
 *         ops,
 *         learn_rate,
 *         L2=L2,             # <<<<<<<<<<<<<<
 *         beta1=beta1,
 *         beta2=beta2,
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(17); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 83, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2, __pyx_v_L2) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":84
 *         learn_rate,
 *         L2=L2,
 *         beta1=beta1,             # <<<<<<<<<<<<<<
 *         beta2=beta2,
 *         eps=eps,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta1, __pyx_v_beta1) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":85
 *         L2=L2,
 *         beta1=beta1,
 *         beta2=beta2,             # <<<<<<<<<<<<<<
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta2, __pyx_v_beta2) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":86
 *         beta1=beta1,
 *         beta2=beta2,
 *         eps=eps,             # <<<<<<<<<<<<<<
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_eps, __pyx_v_eps) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":87
 *         beta2=beta2,
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,             # <<<<<<<<<<<<<<
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_max_grad_norm, __pyx_v_max_grad_norm) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":88
 *         eps=eps,
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,             # <<<<<<<<<<<<<<
 *         schedules=schedules,
 *         use_averages=True,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2_is_weight_decay, __pyx_v_L2_is_weight_decay) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":89
 *         max_grad_norm=max_grad_norm,
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,             # <<<<<<<<<<<<<<
 *         use_averages=True,
 *         decay=0.0, decay_steps=0, b1_decay=0, b2_decay=0,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_schedules, __pyx_v_schedules) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":90
 *         L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules,
 *         use_averages=True,             # <<<<<<<<<<<<<<
 *         decay=0.0, decay_steps=0, b1_decay=0, b2_decay=0,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_averages, Py_True) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_decay, __pyx_float_0_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_decay_steps, __pyx_int_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_b1_decay, __pyx_int_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_b2_decay, __pyx_int_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":92
 *         use_averages=True,
 *         decay=0.0, decay_steps=0, b1_decay=0, b2_decay=0,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,             # <<<<<<<<<<<<<<
 *         use_radam=False, use_lars=False
 *     )
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_nesterov, Py_None) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_lookahead_k, __pyx_int_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_lookahead_alpha, __pyx_int_0) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":93
 *         decay=0.0, decay_steps=0, b1_decay=0, b2_decay=0,
 *         nesterov=None, lookahead_k=0, lookahead_alpha=0,
 *         use_radam=False, use_lars=False             # <<<<<<<<<<<<<<
 *     )
 * 
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_radam, Py_False) < 0) __PYX_ERR(0, 83, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_use_lars, Py_False) < 0) __PYX_ERR(0, 83, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":80
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(             # <<<<<<<<<<<<<<
 *         ops,
 *         learn_rate,
 */
  __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 80, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("thinc.neural.optimizers.create_Adam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_ops);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":98
 * 
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_5create_SGD(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_5create_SGD = {"create_SGD", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_5create_SGD, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_5create_SGD(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_learn_rate = 0;
  PyObject *__pyx_v_ops = 0;
  PyObject *__pyx_v_L2 = 0;
  PyObject *__pyx_v_max_grad_norm = 0;
  PyObject *__pyx_v_L2_is_weight_decay = 0;
  CYTHON_UNUSED PyObject *__pyx_v_use_averages = 0;
  PyObject *__pyx_v_schedules = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("create_SGD (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_learn_rate,&__pyx_n_s_ops,&__pyx_n_s_L2,&__pyx_n_s_max_grad_norm,&__pyx_n_s_L2_is_weight_decay,&__pyx_n_s_use_averages,&__pyx_n_s_schedules,0};
    PyObject* values[7] = {0,0,0,0,0,0,0};

    /* "thinc/neural/optimizers.pyx":99
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,
 *         ops=None,             # <<<<<<<<<<<<<<
 *         L2=SGD_DEFAULTS["L2"],
 *         max_grad_norm=SGD_DEFAULTS["max_grad_norm"],
 */
    values[1] = ((PyObject *)Py_None);
    values[2] = __pyx_k__17;
    values[3] = __pyx_k__18;
    values[4] = __pyx_k__19;

    /* "thinc/neural/optimizers.pyx":103
 *         max_grad_norm=SGD_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=SGD_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,             # <<<<<<<<<<<<<<
 *         schedules=None
 * ):
 */
    values[5] = ((PyObject *)Py_True);

    /* "thinc/neural/optimizers.pyx":104
 *         L2_is_weight_decay=SGD_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 *         schedules=None             # <<<<<<<<<<<<<<
 * ):
 *     ops = _make_ops(ops)
 */
    values[6] = ((PyObject *)Py_None);
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_learn_rate)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ops);
          if (value) { values[1] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2);
          if (value) { values[2] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_grad_norm);
          if (value) { values[3] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2_is_weight_decay);
          if (value) { values[4] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_averages);
          if (value) { values[5] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_schedules);
          if (value) { values[6] = value; kw_args--; }
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "create_SGD") < 0)) __PYX_ERR(0, 98, __pyx_L3_error)
      }
    } else {
      switch (PyTuple_GET_SIZE(__pyx_args)) {
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        break;
        default: goto __pyx_L5_argtuple_error;
      }
    }
    __pyx_v_learn_rate = values[0];
    __pyx_v_ops = values[1];
    __pyx_v_L2 = values[2];
    __pyx_v_max_grad_norm = values[3];
    __pyx_v_L2_is_weight_decay = values[4];
    __pyx_v_use_averages = values[5];
    __pyx_v_schedules = values[6];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("create_SGD", 0, 1, 7, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 98, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.create_SGD", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_4create_SGD(__pyx_self, __pyx_v_learn_rate, __pyx_v_ops, __pyx_v_L2, __pyx_v_max_grad_norm, __pyx_v_L2_is_weight_decay, __pyx_v_use_averages, __pyx_v_schedules);

  /* "thinc/neural/optimizers.pyx":98
 * 
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 */

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_4create_SGD(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_learn_rate, PyObject *__pyx_v_ops, PyObject *__pyx_v_L2, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_L2_is_weight_decay, CYTHON_UNUSED PyObject *__pyx_v_use_averages, PyObject *__pyx_v_schedules) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__20)
  __Pyx_RefNannySetupContext("create_SGD", 0);
  __Pyx_TraceCall("create_SGD", __pyx_f[0], 98, 0, __PYX_ERR(0, 98, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_ops);

  /* "thinc/neural/optimizers.pyx":106
 *         schedules=None
 * ):
 *     ops = _make_ops(ops)             # <<<<<<<<<<<<<<
 *     return Optimizer(ops, learn_rate,
 *         L2=L2, max_grad_norm=max_grad_norm, L2_is_weight_decay=L2_is_weight_decay,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_make_ops); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 106, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_3)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_3);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_3, __pyx_v_ops) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_ops);
  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 106, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF_SET(__pyx_v_ops, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":107
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(ops, learn_rate,             # <<<<<<<<<<<<<<
 *         L2=L2, max_grad_norm=max_grad_norm, L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules, beta1=0.0, beta2=0.0)
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Optimizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 107, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyTuple_New(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 107, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_v_ops);
  __Pyx_GIVEREF(__pyx_v_ops);
  PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_v_ops);
  __Pyx_INCREF(__pyx_v_learn_rate);
  __Pyx_GIVEREF(__pyx_v_learn_rate);
  PyTuple_SET_ITEM(__pyx_t_2, 1, __pyx_v_learn_rate);

  /* "thinc/neural/optimizers.pyx":108
 *     ops = _make_ops(ops)
 *     return Optimizer(ops, learn_rate,
 *         L2=L2, max_grad_norm=max_grad_norm, L2_is_weight_decay=L2_is_weight_decay,             # <<<<<<<<<<<<<<
 *         schedules=schedules, beta1=0.0, beta2=0.0)
 * 
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(6); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 108, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2, __pyx_v_L2) < 0) __PYX_ERR(0, 108, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_max_grad_norm, __pyx_v_max_grad_norm) < 0) __PYX_ERR(0, 108, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_L2_is_weight_decay, __pyx_v_L2_is_weight_decay) < 0) __PYX_ERR(0, 108, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":109
 *     return Optimizer(ops, learn_rate,
 *         L2=L2, max_grad_norm=max_grad_norm, L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules, beta1=0.0, beta2=0.0)             # <<<<<<<<<<<<<<
 * 
 * 
 */
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_schedules, __pyx_v_schedules) < 0) __PYX_ERR(0, 108, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta1, __pyx_float_0_0) < 0) __PYX_ERR(0, 108, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_beta2, __pyx_float_0_0) < 0) __PYX_ERR(0, 108, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":107
 * ):
 *     ops = _make_ops(ops)
 *     return Optimizer(ops, learn_rate,             # <<<<<<<<<<<<<<
 *         L2=L2, max_grad_norm=max_grad_norm, L2_is_weight_decay=L2_is_weight_decay,
 *         schedules=schedules, beta1=0.0, beta2=0.0)
 */
  __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 107, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_r = __pyx_t_4;
  __pyx_t_4 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":98
 * 
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("thinc.neural.optimizers.create_SGD", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_ops);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":124
 *     '''
 *     @classmethod
 *     def from_config(cls, config):             # <<<<<<<<<<<<<<
 *         return registry.make_from_config(config)
 * 
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_1from_config(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_1from_config = {"from_config", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_1from_config, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_1from_config(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  CYTHON_UNUSED PyObject *__pyx_v_cls = 0;
  PyObject *__pyx_v_config = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("from_config (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_cls,&__pyx_n_s_config,0};
    PyObject* values[2] = {0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_cls)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_config)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("from_config", 1, 2, 2, 1); __PYX_ERR(0, 124, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "from_config") < 0)) __PYX_ERR(0, 124, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
    }
    __pyx_v_cls = values[0];
    __pyx_v_config = values[1];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("from_config", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 124, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.from_config", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_from_config(__pyx_self, __pyx_v_cls, __pyx_v_config);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_from_config(CYTHON_UNUSED PyObject *__pyx_self, CYTHON_UNUSED PyObject *__pyx_v_cls, PyObject *__pyx_v_config) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__21)
  __Pyx_RefNannySetupContext("from_config", 0);
  __Pyx_TraceCall("from_config", __pyx_f[0], 124, 0, __PYX_ERR(0, 124, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":125
 *     @classmethod
 *     def from_config(cls, config):
 *         return registry.make_from_config(config)             # <<<<<<<<<<<<<<
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_registry); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 125, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_make_from_config); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 125, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_2)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_2);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_2, __pyx_v_config) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_v_config);
  __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 125, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":124
 *     '''
 *     @classmethod
 *     def from_config(cls, config):             # <<<<<<<<<<<<<<
 *         return registry.make_from_config(config)
 * 
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.from_config", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":127
 *         return registry.make_from_config(config)
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,             # <<<<<<<<<<<<<<
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_3__init__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_3__init__ = {"__init__", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_3__init__, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_3__init__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  PyObject *__pyx_v_ops = 0;
  PyObject *__pyx_v_lr = 0;
  PyObject *__pyx_v_L2 = 0;
  PyObject *__pyx_v_beta1 = 0;
  PyObject *__pyx_v_beta2 = 0;
  PyObject *__pyx_v_eps = 0;
  PyObject *__pyx_v_max_grad_norm = 0;
  PyObject *__pyx_v_gradient_noise = 0;
  PyObject *__pyx_v_nesterov = 0;
  PyObject *__pyx_v_L2_is_weight_decay = 0;
  PyObject *__pyx_v_lookahead_k = 0;
  PyObject *__pyx_v_lookahead_alpha = 0;
  PyObject *__pyx_v_use_averages = 0;
  PyObject *__pyx_v_use_radam = 0;
  PyObject *__pyx_v_use_lars = 0;
  PyObject *__pyx_v_schedule = 0;
  CYTHON_UNUSED PyObject *__pyx_v__ = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__init__ (wrapper)", 0);
  __pyx_v__ = PyDict_New(); if (unlikely(!__pyx_v__)) return NULL;
  __Pyx_GOTREF(__pyx_v__);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_ops,&__pyx_n_s_lr,&__pyx_n_s_L2,&__pyx_n_s_beta1,&__pyx_n_s_beta2,&__pyx_n_s_eps,&__pyx_n_s_max_grad_norm,&__pyx_n_s_gradient_noise,&__pyx_n_s_nesterov,&__pyx_n_s_L2_is_weight_decay,&__pyx_n_s_lookahead_k,&__pyx_n_s_lookahead_alpha,&__pyx_n_s_use_averages,&__pyx_n_s_use_radam,&__pyx_n_s_use_lars,&__pyx_n_s_schedule,0};
    PyObject* values[17] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
    values[3] = ((PyObject *)((PyObject*)__pyx_float_1eneg_4));
    values[4] = ((PyObject *)((PyObject*)__pyx_float_0_90));
    values[5] = ((PyObject *)((PyObject*)__pyx_float_0_999));
    values[6] = ((PyObject *)((PyObject*)__pyx_float_1eneg_08));
    values[7] = ((PyObject *)((PyObject*)__pyx_float_10_));
    values[8] = ((PyObject *)((PyObject*)__pyx_float_0_0));

    /* "thinc/neural/optimizers.pyx":128
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,             # <<<<<<<<<<<<<<
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):
 */
    values[9] = ((PyObject *)((PyObject *)Py_True));

    /* "thinc/neural/optimizers.pyx":129
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,             # <<<<<<<<<<<<<<
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):
 *         self.ops = ops
 */
    values[10] = ((PyObject *)((PyObject *)Py_False));
    values[11] = ((PyObject *)((PyObject *)__pyx_int_0));
    values[12] = ((PyObject *)((PyObject*)__pyx_float_0_5));

    /* "thinc/neural/optimizers.pyx":130
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):             # <<<<<<<<<<<<<<
 *         self.ops = ops
 *         if schedule is None:
 */
    values[13] = ((PyObject *)((PyObject *)Py_True));
    values[14] = ((PyObject *)((PyObject *)Py_False));
    values[15] = ((PyObject *)((PyObject *)Py_False));
    values[16] = ((PyObject *)((PyObject *)Py_None));
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case 17: values[16] = PyTuple_GET_ITEM(__pyx_args, 16);
        CYTHON_FALLTHROUGH;
        case 16: values[15] = PyTuple_GET_ITEM(__pyx_args, 15);
        CYTHON_FALLTHROUGH;
        case 15: values[14] = PyTuple_GET_ITEM(__pyx_args, 14);
        CYTHON_FALLTHROUGH;
        case 14: values[13] = PyTuple_GET_ITEM(__pyx_args, 13);
        CYTHON_FALLTHROUGH;
        case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12);
        CYTHON_FALLTHROUGH;
        case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11);
        CYTHON_FALLTHROUGH;
        case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10);
        CYTHON_FALLTHROUGH;
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_ops)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 17, 1); __PYX_ERR(0, 127, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lr)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 17, 2); __PYX_ERR(0, 127, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2);
          if (value) { values[3] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta1);
          if (value) { values[4] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_beta2);
          if (value) { values[5] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_eps);
          if (value) { values[6] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  7:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_grad_norm);
          if (value) { values[7] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  8:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_gradient_noise);
          if (value) { values[8] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  9:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nesterov);
          if (value) { values[9] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 10:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_L2_is_weight_decay);
          if (value) { values[10] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 11:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lookahead_k);
          if (value) { values[11] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 12:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lookahead_alpha);
          if (value) { values[12] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 13:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_averages);
          if (value) { values[13] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 14:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_radam);
          if (value) { values[14] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 15:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_use_lars);
          if (value) { values[15] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case 16:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_schedule);
          if (value) { values[16] = value; kw_args--; }
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, __pyx_v__, values, pos_args, "__init__") < 0)) __PYX_ERR(0, 127, __pyx_L3_error)
      }
    } else {
      switch (PyTuple_GET_SIZE(__pyx_args)) {
        case 17: values[16] = PyTuple_GET_ITEM(__pyx_args, 16);
        CYTHON_FALLTHROUGH;
        case 16: values[15] = PyTuple_GET_ITEM(__pyx_args, 15);
        CYTHON_FALLTHROUGH;
        case 15: values[14] = PyTuple_GET_ITEM(__pyx_args, 14);
        CYTHON_FALLTHROUGH;
        case 14: values[13] = PyTuple_GET_ITEM(__pyx_args, 13);
        CYTHON_FALLTHROUGH;
        case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12);
        CYTHON_FALLTHROUGH;
        case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11);
        CYTHON_FALLTHROUGH;
        case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10);
        CYTHON_FALLTHROUGH;
        case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9);
        CYTHON_FALLTHROUGH;
        case  9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8);
        CYTHON_FALLTHROUGH;
        case  8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7);
        CYTHON_FALLTHROUGH;
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        break;
        default: goto __pyx_L5_argtuple_error;
      }
    }
    __pyx_v_self = values[0];
    __pyx_v_ops = values[1];
    __pyx_v_lr = values[2];
    __pyx_v_L2 = values[3];
    __pyx_v_beta1 = values[4];
    __pyx_v_beta2 = values[5];
    __pyx_v_eps = values[6];
    __pyx_v_max_grad_norm = values[7];
    __pyx_v_gradient_noise = values[8];
    __pyx_v_nesterov = values[9];
    __pyx_v_L2_is_weight_decay = values[10];
    __pyx_v_lookahead_k = values[11];
    __pyx_v_lookahead_alpha = values[12];
    __pyx_v_use_averages = values[13];
    __pyx_v_use_radam = values[14];
    __pyx_v_use_lars = values[15];
    __pyx_v_schedule = values[16];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("__init__", 0, 3, 17, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 127, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_DECREF(__pyx_v__); __pyx_v__ = 0;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_2__init__(__pyx_self, __pyx_v_self, __pyx_v_ops, __pyx_v_lr, __pyx_v_L2, __pyx_v_beta1, __pyx_v_beta2, __pyx_v_eps, __pyx_v_max_grad_norm, __pyx_v_gradient_noise, __pyx_v_nesterov, __pyx_v_L2_is_weight_decay, __pyx_v_lookahead_k, __pyx_v_lookahead_alpha, __pyx_v_use_averages, __pyx_v_use_radam, __pyx_v_use_lars, __pyx_v_schedule, __pyx_v__);

  /* "thinc/neural/optimizers.pyx":127
 *         return registry.make_from_config(config)
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,             # <<<<<<<<<<<<<<
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 */

  /* function exit code */
  __Pyx_XDECREF(__pyx_v__);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_2__init__(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_ops, PyObject *__pyx_v_lr, PyObject *__pyx_v_L2, PyObject *__pyx_v_beta1, PyObject *__pyx_v_beta2, PyObject *__pyx_v_eps, PyObject *__pyx_v_max_grad_norm, PyObject *__pyx_v_gradient_noise, PyObject *__pyx_v_nesterov, PyObject *__pyx_v_L2_is_weight_decay, PyObject *__pyx_v_lookahead_k, PyObject *__pyx_v_lookahead_alpha, PyObject *__pyx_v_use_averages, PyObject *__pyx_v_use_radam, PyObject *__pyx_v_use_lars, PyObject *__pyx_v_schedule, CYTHON_UNUSED PyObject *__pyx_v__) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  int __pyx_t_1;
  int __pyx_t_2;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  PyObject *__pyx_t_5 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__22)
  __Pyx_RefNannySetupContext("__init__", 0);
  __Pyx_TraceCall("__init__", __pyx_f[0], 127, 0, __PYX_ERR(0, 127, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":131
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):
 *         self.ops = ops             # <<<<<<<<<<<<<<
 *         if schedule is None:
 *             self.schedule = {}
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_ops, __pyx_v_ops) < 0) __PYX_ERR(0, 131, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":132
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):
 *         self.ops = ops
 *         if schedule is None:             # <<<<<<<<<<<<<<
 *             self.schedule = {}
 *         else:
 */
  __pyx_t_1 = (__pyx_v_schedule == Py_None);
  __pyx_t_2 = (__pyx_t_1 != 0);
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":133
 *         self.ops = ops
 *         if schedule is None:
 *             self.schedule = {}             # <<<<<<<<<<<<<<
 *         else:
 *             self.schedule = dict(schedule)
 */
    __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 133, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_schedule, __pyx_t_3) < 0) __PYX_ERR(0, 133, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":132
 *                  use_averages=True, use_radam=False, use_lars=False, schedule=None, **_):
 *         self.ops = ops
 *         if schedule is None:             # <<<<<<<<<<<<<<
 *             self.schedule = {}
 *         else:
 */
    goto __pyx_L3;
  }

  /* "thinc/neural/optimizers.pyx":135
 *             self.schedule = {}
 *         else:
 *             self.schedule = dict(schedule)             # <<<<<<<<<<<<<<
 *         self.mom1 = {}
 *         self.mom2 = {}
 */
  /*else*/ {
    __pyx_t_3 = __Pyx_PyObject_CallOneArg(((PyObject *)(&PyDict_Type)), __pyx_v_schedule); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 135, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_schedule, __pyx_t_3) < 0) __PYX_ERR(0, 135, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  }
  __pyx_L3:;

  /* "thinc/neural/optimizers.pyx":136
 *         else:
 *             self.schedule = dict(schedule)
 *         self.mom1 = {}             # <<<<<<<<<<<<<<
 *         self.mom2 = {}
 *         self.slow_weights = {} # For lookahead
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 136, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_mom1, __pyx_t_3) < 0) __PYX_ERR(0, 136, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":137
 *             self.schedule = dict(schedule)
 *         self.mom1 = {}
 *         self.mom2 = {}             # <<<<<<<<<<<<<<
 *         self.slow_weights = {} # For lookahead
 *         if use_averages:
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 137, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_mom2, __pyx_t_3) < 0) __PYX_ERR(0, 137, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":138
 *         self.mom1 = {}
 *         self.mom2 = {}
 *         self.slow_weights = {} # For lookahead             # <<<<<<<<<<<<<<
 *         if use_averages:
 *             self.averages = {}
 */
  __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 138, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_slow_weights, __pyx_t_3) < 0) __PYX_ERR(0, 138, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":139
 *         self.mom2 = {}
 *         self.slow_weights = {} # For lookahead
 *         if use_averages:             # <<<<<<<<<<<<<<
 *             self.averages = {}
 *         else:
 */
  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_use_averages); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 139, __pyx_L1_error)
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":140
 *         self.slow_weights = {} # For lookahead
 *         if use_averages:
 *             self.averages = {}             # <<<<<<<<<<<<<<
 *         else:
 *             self.averages = None
 */
    __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 140, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_averages, __pyx_t_3) < 0) __PYX_ERR(0, 140, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":139
 *         self.mom2 = {}
 *         self.slow_weights = {} # For lookahead
 *         if use_averages:             # <<<<<<<<<<<<<<
 *             self.averages = {}
 *         else:
 */
    goto __pyx_L4;
  }

  /* "thinc/neural/optimizers.pyx":142
 *             self.averages = {}
 *         else:
 *             self.averages = None             # <<<<<<<<<<<<<<
 *         self.nr_update = defaultdict(int)
 *         self.last_seen = defaultdict(int)
 */
  /*else*/ {
    if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_averages, Py_None) < 0) __PYX_ERR(0, 142, __pyx_L1_error)
  }
  __pyx_L4:;

  /* "thinc/neural/optimizers.pyx":143
 *         else:
 *             self.averages = None
 *         self.nr_update = defaultdict(int)             # <<<<<<<<<<<<<<
 *         self.last_seen = defaultdict(int)
 *         self.max_grad_norm = max_grad_norm
 */
  __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_defaultdict); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 143, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
    }
  }
  __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, ((PyObject *)(&PyInt_Type))) : __Pyx_PyObject_CallOneArg(__pyx_t_4, ((PyObject *)(&PyInt_Type)));
  __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
  if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 143, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_nr_update, __pyx_t_3) < 0) __PYX_ERR(0, 143, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":144
 *             self.averages = None
 *         self.nr_update = defaultdict(int)
 *         self.last_seen = defaultdict(int)             # <<<<<<<<<<<<<<
 *         self.max_grad_norm = max_grad_norm
 *         self.alpha = lr
 */
  __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_defaultdict); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 144, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
    }
  }
  __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, ((PyObject *)(&PyInt_Type))) : __Pyx_PyObject_CallOneArg(__pyx_t_4, ((PyObject *)(&PyInt_Type)));
  __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
  if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 144, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_last_seen, __pyx_t_3) < 0) __PYX_ERR(0, 144, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":145
 *         self.nr_update = defaultdict(int)
 *         self.last_seen = defaultdict(int)
 *         self.max_grad_norm = max_grad_norm             # <<<<<<<<<<<<<<
 *         self.alpha = lr
 *         self.b1 = beta1
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_max_grad_norm, __pyx_v_max_grad_norm) < 0) __PYX_ERR(0, 145, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":146
 *         self.last_seen = defaultdict(int)
 *         self.max_grad_norm = max_grad_norm
 *         self.alpha = lr             # <<<<<<<<<<<<<<
 *         self.b1 = beta1
 *         self.b2 = beta2
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_alpha, __pyx_v_lr) < 0) __PYX_ERR(0, 146, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":147
 *         self.max_grad_norm = max_grad_norm
 *         self.alpha = lr
 *         self.b1 = beta1             # <<<<<<<<<<<<<<
 *         self.b2 = beta2
 *         self.gradient_noise = gradient_noise
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_b1, __pyx_v_beta1) < 0) __PYX_ERR(0, 147, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":148
 *         self.alpha = lr
 *         self.b1 = beta1
 *         self.b2 = beta2             # <<<<<<<<<<<<<<
 *         self.gradient_noise = gradient_noise
 *         self.eps = eps
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_b2, __pyx_v_beta2) < 0) __PYX_ERR(0, 148, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":149
 *         self.b1 = beta1
 *         self.b2 = beta2
 *         self.gradient_noise = gradient_noise             # <<<<<<<<<<<<<<
 *         self.eps = eps
 *         self.L2 = L2
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_gradient_noise, __pyx_v_gradient_noise) < 0) __PYX_ERR(0, 149, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":150
 *         self.b2 = beta2
 *         self.gradient_noise = gradient_noise
 *         self.eps = eps             # <<<<<<<<<<<<<<
 *         self.L2 = L2
 *         self.nesterov = nesterov
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_eps, __pyx_v_eps) < 0) __PYX_ERR(0, 150, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":151
 *         self.gradient_noise = gradient_noise
 *         self.eps = eps
 *         self.L2 = L2             # <<<<<<<<<<<<<<
 *         self.nesterov = nesterov
 *         self.L2_is_weight_decay = L2_is_weight_decay
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_L2, __pyx_v_L2) < 0) __PYX_ERR(0, 151, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":152
 *         self.eps = eps
 *         self.L2 = L2
 *         self.nesterov = nesterov             # <<<<<<<<<<<<<<
 *         self.L2_is_weight_decay = L2_is_weight_decay
 *         self.lookahead_k = lookahead_k
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_nesterov, __pyx_v_nesterov) < 0) __PYX_ERR(0, 152, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":153
 *         self.L2 = L2
 *         self.nesterov = nesterov
 *         self.L2_is_weight_decay = L2_is_weight_decay             # <<<<<<<<<<<<<<
 *         self.lookahead_k = lookahead_k
 *         self.lookahead_alpha = lookahead_alpha
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_L2_is_weight_decay, __pyx_v_L2_is_weight_decay) < 0) __PYX_ERR(0, 153, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":154
 *         self.nesterov = nesterov
 *         self.L2_is_weight_decay = L2_is_weight_decay
 *         self.lookahead_k = lookahead_k             # <<<<<<<<<<<<<<
 *         self.lookahead_alpha = lookahead_alpha
 *         self.use_radam = use_radam
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_lookahead_k, __pyx_v_lookahead_k) < 0) __PYX_ERR(0, 154, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":155
 *         self.L2_is_weight_decay = L2_is_weight_decay
 *         self.lookahead_k = lookahead_k
 *         self.lookahead_alpha = lookahead_alpha             # <<<<<<<<<<<<<<
 *         self.use_radam = use_radam
 *         # Deprecated
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_lookahead_alpha, __pyx_v_lookahead_alpha) < 0) __PYX_ERR(0, 155, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":156
 *         self.lookahead_k = lookahead_k
 *         self.lookahead_alpha = lookahead_alpha
 *         self.use_radam = use_radam             # <<<<<<<<<<<<<<
 *         # Deprecated
 *         self.use_lars = use_lars
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_use_radam, __pyx_v_use_radam) < 0) __PYX_ERR(0, 156, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":158
 *         self.use_radam = use_radam
 *         # Deprecated
 *         self.use_lars = use_lars             # <<<<<<<<<<<<<<
 *         self.decay = 0.0
 *         self.decay_steps = 0
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_use_lars, __pyx_v_use_lars) < 0) __PYX_ERR(0, 158, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":159
 *         # Deprecated
 *         self.use_lars = use_lars
 *         self.decay = 0.0             # <<<<<<<<<<<<<<
 *         self.decay_steps = 0
 *         self.lars_min = 0
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_decay, __pyx_float_0_0) < 0) __PYX_ERR(0, 159, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":160
 *         self.use_lars = use_lars
 *         self.decay = 0.0
 *         self.decay_steps = 0             # <<<<<<<<<<<<<<
 *         self.lars_min = 0
 *         self.lars_max = 10
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_decay_steps, __pyx_int_0) < 0) __PYX_ERR(0, 160, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":161
 *         self.decay = 0.0
 *         self.decay_steps = 0
 *         self.lars_min = 0             # <<<<<<<<<<<<<<
 *         self.lars_max = 10
 *         self.b1_decay = 0.0
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_lars_min, __pyx_int_0) < 0) __PYX_ERR(0, 161, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":162
 *         self.decay_steps = 0
 *         self.lars_min = 0
 *         self.lars_max = 10             # <<<<<<<<<<<<<<
 *         self.b1_decay = 0.0
 *         self.b2_decay = 0.0
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_lars_max, __pyx_int_10) < 0) __PYX_ERR(0, 162, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":163
 *         self.lars_min = 0
 *         self.lars_max = 10
 *         self.b1_decay = 0.0             # <<<<<<<<<<<<<<
 *         self.b2_decay = 0.0
 * 
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_b1_decay, __pyx_float_0_0) < 0) __PYX_ERR(0, 163, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":164
 *         self.lars_max = 10
 *         self.b1_decay = 0.0
 *         self.b2_decay = 0.0             # <<<<<<<<<<<<<<
 * 
 *     def to_gpu(self):
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_b2_decay, __pyx_float_0_0) < 0) __PYX_ERR(0, 164, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":127
 *         return registry.make_from_config(config)
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,             # <<<<<<<<<<<<<<
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":166
 *         self.b2_decay = 0.0
 * 
 *     def to_gpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_5to_gpu(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_5to_gpu = {"to_gpu", (PyCFunction)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_5to_gpu, METH_O, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_5to_gpu(PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("to_gpu (wrapper)", 0);
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_4to_gpu(__pyx_self, ((PyObject *)__pyx_v_self));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_4to_gpu(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_v_params = NULL;
  PyObject *__pyx_v_key = NULL;
  PyObject *__pyx_v_value = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  Py_ssize_t __pyx_t_5;
  Py_ssize_t __pyx_t_6;
  PyObject *(*__pyx_t_7)(PyObject *);
  PyObject *__pyx_t_8 = NULL;
  PyObject *__pyx_t_9 = NULL;
  PyObject *(*__pyx_t_10)(PyObject *);
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__23)
  __Pyx_RefNannySetupContext("to_gpu", 0);
  __Pyx_TraceCall("to_gpu", __pyx_f[0], 166, 0, __PYX_ERR(0, 166, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":167
 * 
 *     def to_gpu(self):
 *         self.ops = CupyOps()             # <<<<<<<<<<<<<<
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_CupyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 167, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_3)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_3);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 167, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_ops, __pyx_t_1) < 0) __PYX_ERR(0, 167, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":168
 *     def to_gpu(self):
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):             # <<<<<<<<<<<<<<
 *             for key, value in params.items():
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 168, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 168, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 168, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 168, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_GIVEREF(__pyx_t_1);
  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_3);
  PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3);
  __pyx_t_1 = 0;
  __pyx_t_2 = 0;
  __pyx_t_3 = 0;
  __pyx_t_3 = __pyx_t_4; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  for (;;) {
    if (__pyx_t_5 >= 3) break;
    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_4); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 168, __pyx_L1_error)
    #else
    __pyx_t_4 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 168, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    #endif
    __Pyx_XDECREF_SET(__pyx_v_params, __pyx_t_4);
    __pyx_t_4 = 0;

    /* "thinc/neural/optimizers.pyx":169
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():             # <<<<<<<<<<<<<<
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 */
    __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_params, __pyx_n_s_items); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_1 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_2))) {
      __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_2);
      if (likely(__pyx_t_1)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
        __Pyx_INCREF(__pyx_t_1);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_2, function);
      }
    }
    __pyx_t_4 = (__pyx_t_1) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_1) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
    if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 169, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    if (likely(PyList_CheckExact(__pyx_t_4)) || PyTuple_CheckExact(__pyx_t_4)) {
      __pyx_t_2 = __pyx_t_4; __Pyx_INCREF(__pyx_t_2); __pyx_t_6 = 0;
      __pyx_t_7 = NULL;
    } else {
      __pyx_t_6 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 169, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_7 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 169, __pyx_L1_error)
    }
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    for (;;) {
      if (likely(!__pyx_t_7)) {
        if (likely(PyList_CheckExact(__pyx_t_2))) {
          if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_2)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_4 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_4); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 169, __pyx_L1_error)
          #else
          __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 169, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_4);
          #endif
        } else {
          if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_4); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 169, __pyx_L1_error)
          #else
          __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 169, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_4);
          #endif
        }
      } else {
        __pyx_t_4 = __pyx_t_7(__pyx_t_2);
        if (unlikely(!__pyx_t_4)) {
          PyObject* exc_type = PyErr_Occurred();
          if (exc_type) {
            if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
            else __PYX_ERR(0, 169, __pyx_L1_error)
          }
          break;
        }
        __Pyx_GOTREF(__pyx_t_4);
      }
      if ((likely(PyTuple_CheckExact(__pyx_t_4))) || (PyList_CheckExact(__pyx_t_4))) {
        PyObject* sequence = __pyx_t_4;
        Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
        if (unlikely(size != 2)) {
          if (size > 2) __Pyx_RaiseTooManyValuesError(2);
          else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
          __PYX_ERR(0, 169, __pyx_L1_error)
        }
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        if (likely(PyTuple_CheckExact(sequence))) {
          __pyx_t_1 = PyTuple_GET_ITEM(sequence, 0); 
          __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1); 
        } else {
          __pyx_t_1 = PyList_GET_ITEM(sequence, 0); 
          __pyx_t_8 = PyList_GET_ITEM(sequence, 1); 
        }
        __Pyx_INCREF(__pyx_t_1);
        __Pyx_INCREF(__pyx_t_8);
        #else
        __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 169, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_1);
        __pyx_t_8 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 169, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_8);
        #endif
        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      } else {
        Py_ssize_t index = -1;
        __pyx_t_9 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 169, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_9);
        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
        __pyx_t_10 = Py_TYPE(__pyx_t_9)->tp_iternext;
        index = 0; __pyx_t_1 = __pyx_t_10(__pyx_t_9); if (unlikely(!__pyx_t_1)) goto __pyx_L7_unpacking_failed;
        __Pyx_GOTREF(__pyx_t_1);
        index = 1; __pyx_t_8 = __pyx_t_10(__pyx_t_9); if (unlikely(!__pyx_t_8)) goto __pyx_L7_unpacking_failed;
        __Pyx_GOTREF(__pyx_t_8);
        if (__Pyx_IternextUnpackEndCheck(__pyx_t_10(__pyx_t_9), 2) < 0) __PYX_ERR(0, 169, __pyx_L1_error)
        __pyx_t_10 = NULL;
        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
        goto __pyx_L8_unpacking_done;
        __pyx_L7_unpacking_failed:;
        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
        __pyx_t_10 = NULL;
        if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
        __PYX_ERR(0, 169, __pyx_L1_error)
        __pyx_L8_unpacking_done:;
      }
      __Pyx_XDECREF_SET(__pyx_v_key, __pyx_t_1);
      __pyx_t_1 = 0;
      __Pyx_XDECREF_SET(__pyx_v_value, __pyx_t_8);
      __pyx_t_8 = 0;

      /* "thinc/neural/optimizers.pyx":170
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)             # <<<<<<<<<<<<<<
 * 
 *     def to_cpu(self):
 */
      __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_xp); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_8);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_asarray); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
      __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_8);
      __Pyx_INCREF(__pyx_v_value);
      __Pyx_GIVEREF(__pyx_v_value);
      PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_v_value);
      __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_1);
      __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_value, __pyx_n_s_dtype); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_9);
      if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_9) < 0) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
      __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_8, __pyx_t_1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_9);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
      if (unlikely(PyObject_SetItem(__pyx_v_params, __pyx_v_key, __pyx_t_9) < 0)) __PYX_ERR(0, 170, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;

      /* "thinc/neural/optimizers.pyx":169
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():             # <<<<<<<<<<<<<<
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 */
    }
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

    /* "thinc/neural/optimizers.pyx":168
 *     def to_gpu(self):
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):             # <<<<<<<<<<<<<<
 *             for key, value in params.items():
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 */
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":166
 *         self.b2_decay = 0.0
 * 
 *     def to_gpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_XDECREF(__pyx_t_9);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.to_gpu", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_params);
  __Pyx_XDECREF(__pyx_v_key);
  __Pyx_XDECREF(__pyx_v_value);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":172
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 *     def to_cpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_7to_cpu(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_7to_cpu = {"to_cpu", (PyCFunction)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_7to_cpu, METH_O, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_7to_cpu(PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("to_cpu (wrapper)", 0);
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_6to_cpu(__pyx_self, ((PyObject *)__pyx_v_self));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_6to_cpu(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_v_params = NULL;
  PyObject *__pyx_v_key = NULL;
  PyObject *__pyx_v_value = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  Py_ssize_t __pyx_t_5;
  Py_ssize_t __pyx_t_6;
  PyObject *(*__pyx_t_7)(PyObject *);
  PyObject *__pyx_t_8 = NULL;
  PyObject *__pyx_t_9 = NULL;
  PyObject *(*__pyx_t_10)(PyObject *);
  int __pyx_t_11;
  int __pyx_t_12;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__24)
  __Pyx_RefNannySetupContext("to_cpu", 0);
  __Pyx_TraceCall("to_cpu", __pyx_f[0], 172, 0, __PYX_ERR(0, 172, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":173
 * 
 *     def to_cpu(self):
 *         self.ops = NumpyOps()             # <<<<<<<<<<<<<<
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_NumpyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 173, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_3)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_3);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
    }
  }
  __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 173, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_ops, __pyx_t_1) < 0) __PYX_ERR(0, 173, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":174
 *     def to_cpu(self):
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):             # <<<<<<<<<<<<<<
 *             for key, value in params.items():
 *                 if hasattr(value, 'get'):
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 174, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 174, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 174, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = PyTuple_New(3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 174, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_GIVEREF(__pyx_t_1);
  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
  __Pyx_GIVEREF(__pyx_t_2);
  PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_2);
  __Pyx_GIVEREF(__pyx_t_3);
  PyTuple_SET_ITEM(__pyx_t_4, 2, __pyx_t_3);
  __pyx_t_1 = 0;
  __pyx_t_2 = 0;
  __pyx_t_3 = 0;
  __pyx_t_3 = __pyx_t_4; __Pyx_INCREF(__pyx_t_3); __pyx_t_5 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  for (;;) {
    if (__pyx_t_5 >= 3) break;
    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_5); __Pyx_INCREF(__pyx_t_4); __pyx_t_5++; if (unlikely(0 < 0)) __PYX_ERR(0, 174, __pyx_L1_error)
    #else
    __pyx_t_4 = PySequence_ITEM(__pyx_t_3, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 174, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    #endif
    __Pyx_XDECREF_SET(__pyx_v_params, __pyx_t_4);
    __pyx_t_4 = 0;

    /* "thinc/neural/optimizers.pyx":175
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():             # <<<<<<<<<<<<<<
 *                 if hasattr(value, 'get'):
 *                     params[key] = value.get()
 */
    __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_params, __pyx_n_s_items); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 175, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_1 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_2))) {
      __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_2);
      if (likely(__pyx_t_1)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
        __Pyx_INCREF(__pyx_t_1);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_2, function);
      }
    }
    __pyx_t_4 = (__pyx_t_1) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_1) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
    if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 175, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    if (likely(PyList_CheckExact(__pyx_t_4)) || PyTuple_CheckExact(__pyx_t_4)) {
      __pyx_t_2 = __pyx_t_4; __Pyx_INCREF(__pyx_t_2); __pyx_t_6 = 0;
      __pyx_t_7 = NULL;
    } else {
      __pyx_t_6 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 175, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_7 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 175, __pyx_L1_error)
    }
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    for (;;) {
      if (likely(!__pyx_t_7)) {
        if (likely(PyList_CheckExact(__pyx_t_2))) {
          if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_2)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_4 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_4); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 175, __pyx_L1_error)
          #else
          __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 175, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_4);
          #endif
        } else {
          if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_2)) break;
          #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
          __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_6); __Pyx_INCREF(__pyx_t_4); __pyx_t_6++; if (unlikely(0 < 0)) __PYX_ERR(0, 175, __pyx_L1_error)
          #else
          __pyx_t_4 = PySequence_ITEM(__pyx_t_2, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 175, __pyx_L1_error)
          __Pyx_GOTREF(__pyx_t_4);
          #endif
        }
      } else {
        __pyx_t_4 = __pyx_t_7(__pyx_t_2);
        if (unlikely(!__pyx_t_4)) {
          PyObject* exc_type = PyErr_Occurred();
          if (exc_type) {
            if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
            else __PYX_ERR(0, 175, __pyx_L1_error)
          }
          break;
        }
        __Pyx_GOTREF(__pyx_t_4);
      }
      if ((likely(PyTuple_CheckExact(__pyx_t_4))) || (PyList_CheckExact(__pyx_t_4))) {
        PyObject* sequence = __pyx_t_4;
        Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
        if (unlikely(size != 2)) {
          if (size > 2) __Pyx_RaiseTooManyValuesError(2);
          else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
          __PYX_ERR(0, 175, __pyx_L1_error)
        }
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        if (likely(PyTuple_CheckExact(sequence))) {
          __pyx_t_1 = PyTuple_GET_ITEM(sequence, 0); 
          __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1); 
        } else {
          __pyx_t_1 = PyList_GET_ITEM(sequence, 0); 
          __pyx_t_8 = PyList_GET_ITEM(sequence, 1); 
        }
        __Pyx_INCREF(__pyx_t_1);
        __Pyx_INCREF(__pyx_t_8);
        #else
        __pyx_t_1 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 175, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_1);
        __pyx_t_8 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 175, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_8);
        #endif
        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      } else {
        Py_ssize_t index = -1;
        __pyx_t_9 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 175, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_9);
        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
        __pyx_t_10 = Py_TYPE(__pyx_t_9)->tp_iternext;
        index = 0; __pyx_t_1 = __pyx_t_10(__pyx_t_9); if (unlikely(!__pyx_t_1)) goto __pyx_L7_unpacking_failed;
        __Pyx_GOTREF(__pyx_t_1);
        index = 1; __pyx_t_8 = __pyx_t_10(__pyx_t_9); if (unlikely(!__pyx_t_8)) goto __pyx_L7_unpacking_failed;
        __Pyx_GOTREF(__pyx_t_8);
        if (__Pyx_IternextUnpackEndCheck(__pyx_t_10(__pyx_t_9), 2) < 0) __PYX_ERR(0, 175, __pyx_L1_error)
        __pyx_t_10 = NULL;
        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
        goto __pyx_L8_unpacking_done;
        __pyx_L7_unpacking_failed:;
        __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
        __pyx_t_10 = NULL;
        if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
        __PYX_ERR(0, 175, __pyx_L1_error)
        __pyx_L8_unpacking_done:;
      }
      __Pyx_XDECREF_SET(__pyx_v_key, __pyx_t_1);
      __pyx_t_1 = 0;
      __Pyx_XDECREF_SET(__pyx_v_value, __pyx_t_8);
      __pyx_t_8 = 0;

      /* "thinc/neural/optimizers.pyx":176
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():
 *                 if hasattr(value, 'get'):             # <<<<<<<<<<<<<<
 *                     params[key] = value.get()
 * 
 */
      __pyx_t_11 = __Pyx_HasAttr(__pyx_v_value, __pyx_n_s_get); if (unlikely(__pyx_t_11 == ((int)-1))) __PYX_ERR(0, 176, __pyx_L1_error)
      __pyx_t_12 = (__pyx_t_11 != 0);
      if (__pyx_t_12) {

        /* "thinc/neural/optimizers.pyx":177
 *             for key, value in params.items():
 *                 if hasattr(value, 'get'):
 *                     params[key] = value.get()             # <<<<<<<<<<<<<<
 * 
 *     def step_schedules(self):
 */
        __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_value, __pyx_n_s_get); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 177, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_8);
        __pyx_t_1 = NULL;
        if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_8))) {
          __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_8);
          if (likely(__pyx_t_1)) {
            PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
            __Pyx_INCREF(__pyx_t_1);
            __Pyx_INCREF(function);
            __Pyx_DECREF_SET(__pyx_t_8, function);
          }
        }
        __pyx_t_4 = (__pyx_t_1) ? __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_t_1) : __Pyx_PyObject_CallNoArg(__pyx_t_8);
        __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
        if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 177, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_4);
        __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
        if (unlikely(PyObject_SetItem(__pyx_v_params, __pyx_v_key, __pyx_t_4) < 0)) __PYX_ERR(0, 177, __pyx_L1_error)
        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

        /* "thinc/neural/optimizers.pyx":176
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():
 *                 if hasattr(value, 'get'):             # <<<<<<<<<<<<<<
 *                     params[key] = value.get()
 * 
 */
      }

      /* "thinc/neural/optimizers.pyx":175
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 *             for key, value in params.items():             # <<<<<<<<<<<<<<
 *                 if hasattr(value, 'get'):
 *                     params[key] = value.get()
 */
    }
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

    /* "thinc/neural/optimizers.pyx":174
 *     def to_cpu(self):
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):             # <<<<<<<<<<<<<<
 *             for key, value in params.items():
 *                 if hasattr(value, 'get'):
 */
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":172
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 *     def to_cpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_XDECREF(__pyx_t_9);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.to_cpu", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_params);
  __Pyx_XDECREF(__pyx_v_key);
  __Pyx_XDECREF(__pyx_v_value);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":179
 *                     params[key] = value.get()
 * 
 *     def step_schedules(self):             # <<<<<<<<<<<<<<
 *         for key, schedule in self.schedules.items():
 *             setattr(self, key, next(schedule))
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_9step_schedules(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_9step_schedules = {"step_schedules", (PyCFunction)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_9step_schedules, METH_O, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_9step_schedules(PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("step_schedules (wrapper)", 0);
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_8step_schedules(__pyx_self, ((PyObject *)__pyx_v_self));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_8step_schedules(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_v_key = NULL;
  PyObject *__pyx_v_schedule = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  Py_ssize_t __pyx_t_4;
  PyObject *(*__pyx_t_5)(PyObject *);
  PyObject *__pyx_t_6 = NULL;
  PyObject *__pyx_t_7 = NULL;
  PyObject *(*__pyx_t_8)(PyObject *);
  int __pyx_t_9;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__25)
  __Pyx_RefNannySetupContext("step_schedules", 0);
  __Pyx_TraceCall("step_schedules", __pyx_f[0], 179, 0, __PYX_ERR(0, 179, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":180
 * 
 *     def step_schedules(self):
 *         for key, schedule in self.schedules.items():             # <<<<<<<<<<<<<<
 *             setattr(self, key, next(schedule))
 * 
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_schedules); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 180, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_items); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = NULL;
  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_2)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_2);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2) : __Pyx_PyObject_CallNoArg(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) {
    __pyx_t_3 = __pyx_t_1; __Pyx_INCREF(__pyx_t_3); __pyx_t_4 = 0;
    __pyx_t_5 = NULL;
  } else {
    __pyx_t_4 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 180, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_5 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 180, __pyx_L1_error)
  }
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  for (;;) {
    if (likely(!__pyx_t_5)) {
      if (likely(PyList_CheckExact(__pyx_t_3))) {
        if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_3)) break;
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        __pyx_t_1 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 180, __pyx_L1_error)
        #else
        __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_1);
        #endif
      } else {
        if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
        #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
        __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 180, __pyx_L1_error)
        #else
        __pyx_t_1 = PySequence_ITEM(__pyx_t_3, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 180, __pyx_L1_error)
        __Pyx_GOTREF(__pyx_t_1);
        #endif
      }
    } else {
      __pyx_t_1 = __pyx_t_5(__pyx_t_3);
      if (unlikely(!__pyx_t_1)) {
        PyObject* exc_type = PyErr_Occurred();
        if (exc_type) {
          if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();
          else __PYX_ERR(0, 180, __pyx_L1_error)
        }
        break;
      }
      __Pyx_GOTREF(__pyx_t_1);
    }
    if ((likely(PyTuple_CheckExact(__pyx_t_1))) || (PyList_CheckExact(__pyx_t_1))) {
      PyObject* sequence = __pyx_t_1;
      Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);
      if (unlikely(size != 2)) {
        if (size > 2) __Pyx_RaiseTooManyValuesError(2);
        else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
        __PYX_ERR(0, 180, __pyx_L1_error)
      }
      #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
      if (likely(PyTuple_CheckExact(sequence))) {
        __pyx_t_2 = PyTuple_GET_ITEM(sequence, 0); 
        __pyx_t_6 = PyTuple_GET_ITEM(sequence, 1); 
      } else {
        __pyx_t_2 = PyList_GET_ITEM(sequence, 0); 
        __pyx_t_6 = PyList_GET_ITEM(sequence, 1); 
      }
      __Pyx_INCREF(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_6);
      #else
      __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 180, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_6 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 180, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_6);
      #endif
      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    } else {
      Py_ssize_t index = -1;
      __pyx_t_7 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 180, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
      __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext;
      index = 0; __pyx_t_2 = __pyx_t_8(__pyx_t_7); if (unlikely(!__pyx_t_2)) goto __pyx_L5_unpacking_failed;
      __Pyx_GOTREF(__pyx_t_2);
      index = 1; __pyx_t_6 = __pyx_t_8(__pyx_t_7); if (unlikely(!__pyx_t_6)) goto __pyx_L5_unpacking_failed;
      __Pyx_GOTREF(__pyx_t_6);
      if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_7), 2) < 0) __PYX_ERR(0, 180, __pyx_L1_error)
      __pyx_t_8 = NULL;
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      goto __pyx_L6_unpacking_done;
      __pyx_L5_unpacking_failed:;
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __pyx_t_8 = NULL;
      if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);
      __PYX_ERR(0, 180, __pyx_L1_error)
      __pyx_L6_unpacking_done:;
    }
    __Pyx_XDECREF_SET(__pyx_v_key, __pyx_t_2);
    __pyx_t_2 = 0;
    __Pyx_XDECREF_SET(__pyx_v_schedule, __pyx_t_6);
    __pyx_t_6 = 0;

    /* "thinc/neural/optimizers.pyx":181
 *     def step_schedules(self):
 *         for key, schedule in self.schedules.items():
 *             setattr(self, key, next(schedule))             # <<<<<<<<<<<<<<
 * 
 *     @property
 */
    __pyx_t_1 = __Pyx_PyIter_Next(__pyx_v_schedule); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 181, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_9 = PyObject_SetAttr(__pyx_v_self, __pyx_v_key, __pyx_t_1); if (unlikely(__pyx_t_9 == ((int)-1))) __PYX_ERR(0, 181, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    /* "thinc/neural/optimizers.pyx":180
 * 
 *     def step_schedules(self):
 *         for key, schedule in self.schedules.items():             # <<<<<<<<<<<<<<
 *             setattr(self, key, next(schedule))
 * 
 */
  }
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":179
 *                     params[key] = value.get()
 * 
 *     def step_schedules(self):             # <<<<<<<<<<<<<<
 *         for key, schedule in self.schedules.items():
 *             setattr(self, key, next(schedule))
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_7);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.step_schedules", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_key);
  __Pyx_XDECREF(__pyx_v_schedule);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":184
 * 
 *     @property
 *     def learn_rate(self):             # <<<<<<<<<<<<<<
 *         return self.alpha
 * 
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_11learn_rate(PyObject *__pyx_self, PyObject *__pyx_v_self); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_11learn_rate = {"learn_rate", (PyCFunction)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_11learn_rate, METH_O, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_11learn_rate(PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("learn_rate (wrapper)", 0);
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_10learn_rate(__pyx_self, ((PyObject *)__pyx_v_self));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_10learn_rate(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__26)
  __Pyx_RefNannySetupContext("learn_rate", 0);
  __Pyx_TraceCall("learn_rate", __pyx_f[0], 184, 0, __PYX_ERR(0, 184, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":185
 *     @property
 *     def learn_rate(self):
 *         return self.alpha             # <<<<<<<<<<<<<<
 * 
 *     @learn_rate.setter
 */
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 185, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":184
 * 
 *     @property
 *     def learn_rate(self):             # <<<<<<<<<<<<<<
 *         return self.alpha
 * 
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.learn_rate", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":188
 * 
 *     @learn_rate.setter
 *     def learn_rate(self, learn_rate):             # <<<<<<<<<<<<<<
 *         self.alpha = learn_rate
 * 
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_13learn_rate(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_13learn_rate = {"learn_rate", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_13learn_rate, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_13learn_rate(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  PyObject *__pyx_v_learn_rate = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("learn_rate (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_learn_rate,0};
    PyObject* values[2] = {0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_learn_rate)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("learn_rate", 1, 2, 2, 1); __PYX_ERR(0, 188, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "learn_rate") < 0)) __PYX_ERR(0, 188, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
    }
    __pyx_v_self = values[0];
    __pyx_v_learn_rate = values[1];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("learn_rate", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 188, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.learn_rate", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_12learn_rate(__pyx_self, __pyx_v_self, __pyx_v_learn_rate);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_12learn_rate(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_learn_rate) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__27)
  __Pyx_RefNannySetupContext("learn_rate", 0);
  __Pyx_TraceCall("learn_rate", __pyx_f[0], 188, 0, __PYX_ERR(0, 188, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":189
 *     @learn_rate.setter
 *     def learn_rate(self, learn_rate):
 *         self.alpha = learn_rate             # <<<<<<<<<<<<<<
 * 
 *     def lr(self, nr_upd):
 */
  if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_alpha, __pyx_v_learn_rate) < 0) __PYX_ERR(0, 189, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":188
 * 
 *     @learn_rate.setter
 *     def learn_rate(self, learn_rate):             # <<<<<<<<<<<<<<
 *         self.alpha = learn_rate
 * 
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.learn_rate", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":191
 *         self.alpha = learn_rate
 * 
 *     def lr(self, nr_upd):             # <<<<<<<<<<<<<<
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_15lr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_15lr = {"lr", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_15lr, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_15lr(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  PyObject *__pyx_v_nr_upd = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("lr (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_nr_upd,0};
    PyObject* values[2] = {0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nr_upd)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("lr", 1, 2, 2, 1); __PYX_ERR(0, 191, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "lr") < 0)) __PYX_ERR(0, 191, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 2) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
    }
    __pyx_v_self = values[0];
    __pyx_v_nr_upd = values[1];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("lr", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 191, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.lr", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_14lr(__pyx_self, __pyx_v_self, __pyx_v_nr_upd);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_14lr(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_nr_upd) {
  PyObject *__pyx_v_alpha = NULL;
  PyObject *__pyx_v_fix1 = NULL;
  PyObject *__pyx_v_fix2 = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  PyObject *__pyx_t_5 = NULL;
  PyObject *__pyx_t_6 = NULL;
  int __pyx_t_7;
  PyObject *__pyx_t_8 = NULL;
  int __pyx_t_9;
  int __pyx_t_10;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__28)
  __Pyx_RefNannySetupContext("lr", 0);
  __Pyx_TraceCall("lr", __pyx_f[0], 191, 0, __PYX_ERR(0, 191, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":192
 * 
 *     def lr(self, nr_upd):
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)             # <<<<<<<<<<<<<<
 *         if self.b1 == 0. or self.b2 == 0.:
 *             return alpha
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_anneal); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 192, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 192, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_decay); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 192, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_decay_steps); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 192, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_6 = NULL;
  __pyx_t_7 = 0;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_2);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_2, function);
      __pyx_t_7 = 1;
    }
  }
  #if CYTHON_FAST_PYCALL
  if (PyFunction_Check(__pyx_t_2)) {
    PyObject *__pyx_temp[5] = {__pyx_t_6, __pyx_t_3, __pyx_t_4, __pyx_t_5, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_7, 4+__pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  } else
  #endif
  #if CYTHON_FAST_PYCCALL
  if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) {
    PyObject *__pyx_temp[5] = {__pyx_t_6, __pyx_t_3, __pyx_t_4, __pyx_t_5, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_7, 4+__pyx_t_7); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  } else
  #endif
  {
    __pyx_t_8 = PyTuple_New(4+__pyx_t_7); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 192, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    if (__pyx_t_6) {
      __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6); __pyx_t_6 = NULL;
    }
    __Pyx_GIVEREF(__pyx_t_3);
    PyTuple_SET_ITEM(__pyx_t_8, 0+__pyx_t_7, __pyx_t_3);
    __Pyx_GIVEREF(__pyx_t_4);
    PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_7, __pyx_t_4);
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_8, 2+__pyx_t_7, __pyx_t_5);
    __Pyx_INCREF(__pyx_v_nr_upd);
    __Pyx_GIVEREF(__pyx_v_nr_upd);
    PyTuple_SET_ITEM(__pyx_t_8, 3+__pyx_t_7, __pyx_v_nr_upd);
    __pyx_t_3 = 0;
    __pyx_t_4 = 0;
    __pyx_t_5 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_8, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 192, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  }
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_alpha = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":193
 *     def lr(self, nr_upd):
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:             # <<<<<<<<<<<<<<
 *             return alpha
 *         fix1 = 1.- (self.b1 ** nr_upd)
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyFloat_EqObjC(__pyx_t_1, __pyx_float_0_, 0., 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (!__pyx_t_10) {
  } else {
    __pyx_t_9 = __pyx_t_10;
    goto __pyx_L4_bool_binop_done;
  }
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_1 = __Pyx_PyFloat_EqObjC(__pyx_t_2, __pyx_float_0_, 0., 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 193, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_9 = __pyx_t_10;
  __pyx_L4_bool_binop_done:;
  if (__pyx_t_9) {

    /* "thinc/neural/optimizers.pyx":194
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:
 *             return alpha             # <<<<<<<<<<<<<<
 *         fix1 = 1.- (self.b1 ** nr_upd)
 *         fix2 = 1.- (self.b2 ** nr_upd)
 */
    __Pyx_XDECREF(__pyx_r);
    __Pyx_INCREF(__pyx_v_alpha);
    __pyx_r = __pyx_v_alpha;
    goto __pyx_L0;

    /* "thinc/neural/optimizers.pyx":193
 *     def lr(self, nr_upd):
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:             # <<<<<<<<<<<<<<
 *             return alpha
 *         fix1 = 1.- (self.b1 ** nr_upd)
 */
  }

  /* "thinc/neural/optimizers.pyx":195
 *         if self.b1 == 0. or self.b2 == 0.:
 *             return alpha
 *         fix1 = 1.- (self.b1 ** nr_upd)             # <<<<<<<<<<<<<<
 *         fix2 = 1.- (self.b2 ** nr_upd)
 *         return alpha * numpy.sqrt(fix2) / fix1
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 195, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyNumber_Power(__pyx_t_1, __pyx_v_nr_upd, Py_None); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 195, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyFloat_SubtractCObj(__pyx_float_1_, __pyx_t_2, 1., 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 195, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_fix1 = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":196
 *             return alpha
 *         fix1 = 1.- (self.b1 ** nr_upd)
 *         fix2 = 1.- (self.b2 ** nr_upd)             # <<<<<<<<<<<<<<
 *         return alpha * numpy.sqrt(fix2) / fix1
 * 
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 196, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyNumber_Power(__pyx_t_1, __pyx_v_nr_upd, Py_None); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 196, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyFloat_SubtractCObj(__pyx_float_1_, __pyx_t_2, 1., 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 196, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_fix2 = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":197
 *         fix1 = 1.- (self.b1 ** nr_upd)
 *         fix2 = 1.- (self.b2 ** nr_upd)
 *         return alpha * numpy.sqrt(fix2) / fix1             # <<<<<<<<<<<<<<
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_numpy); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 197, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_sqrt); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 197, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_8))) {
    __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_8);
    if (likely(__pyx_t_2)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
      __Pyx_INCREF(__pyx_t_2);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_8, function);
    }
  }
  __pyx_t_1 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_8, __pyx_t_2, __pyx_v_fix2) : __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_v_fix2);
  __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __pyx_t_8 = PyNumber_Multiply(__pyx_v_alpha, __pyx_t_1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 197, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyNumber_Divide(__pyx_t_8, __pyx_v_fix1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 197, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":191
 *         self.alpha = learn_rate
 * 
 *     def lr(self, nr_upd):             # <<<<<<<<<<<<<<
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.lr", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_alpha);
  __Pyx_XDECREF(__pyx_v_fix1);
  __Pyx_XDECREF(__pyx_v_fix2);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":199
 *         return alpha * numpy.sqrt(fix2) / fix1
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):             # <<<<<<<<<<<<<<
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_17__call__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_17__call__ = {"__call__", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_17__call__, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_17__call__(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  PyObject *__pyx_v_weights = 0;
  PyObject *__pyx_v_gradient = 0;
  PyObject *__pyx_v_lr_scale = 0;
  PyObject *__pyx_v_key = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__call__ (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_weights,&__pyx_n_s_gradient,&__pyx_n_s_lr_scale,&__pyx_n_s_key,0};
    PyObject* values[5] = {0,0,0,0,0};
    values[3] = ((PyObject *)((PyObject*)__pyx_float_1_));
    values[4] = ((PyObject *)((PyObject *)Py_None));
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("__call__", 0, 3, 5, 1); __PYX_ERR(0, 199, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_gradient)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("__call__", 0, 3, 5, 2); __PYX_ERR(0, 199, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lr_scale);
          if (value) { values[3] = value; kw_args--; }
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (kw_args > 0) {
          PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_key);
          if (value) { values[4] = value; kw_args--; }
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) __PYX_ERR(0, 199, __pyx_L3_error)
      }
    } else {
      switch (PyTuple_GET_SIZE(__pyx_args)) {
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        break;
        default: goto __pyx_L5_argtuple_error;
      }
    }
    __pyx_v_self = values[0];
    __pyx_v_weights = values[1];
    __pyx_v_gradient = values[2];
    __pyx_v_lr_scale = values[3];
    __pyx_v_key = values[4];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("__call__", 0, 3, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 199, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_16__call__(__pyx_self, __pyx_v_self, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_16__call__(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key) {
  PyObject *__pyx_v_xp = NULL;
  PyObject *__pyx_v_nr_upd = NULL;
  PyObject *__pyx_v_slow = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  Py_ssize_t __pyx_t_1;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  int __pyx_t_5;
  int __pyx_t_6;
  PyObject *__pyx_t_7 = NULL;
  int __pyx_t_8;
  int __pyx_t_9;
  PyObject *__pyx_t_10 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__29)
  __Pyx_RefNannySetupContext("__call__", 0);
  __Pyx_TraceCall("__call__", __pyx_f[0], 199, 0, __PYX_ERR(0, 199, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_weights);
  __Pyx_INCREF(__pyx_v_gradient);

  /* "thinc/neural/optimizers.pyx":200
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):
 *         assert len(gradient) >= 1             # <<<<<<<<<<<<<<
 *         xp = get_array_module(weights)
 *         if xp is not self.ops.xp:
 */
  #ifndef CYTHON_WITHOUT_ASSERTIONS
  if (unlikely(!Py_OptimizeFlag)) {
    __pyx_t_1 = PyObject_Length(__pyx_v_gradient); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(0, 200, __pyx_L1_error)
    if (unlikely(!((__pyx_t_1 >= 1) != 0))) {
      PyErr_SetNone(PyExc_AssertionError);
      __PYX_ERR(0, 200, __pyx_L1_error)
    }
  }
  #endif

  /* "thinc/neural/optimizers.pyx":201
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)             # <<<<<<<<<<<<<<
 *         if xp is not self.ops.xp:
 *             if xp is numpy:
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_get_array_module); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 201, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_4 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
    __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
    if (likely(__pyx_t_4)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
      __Pyx_INCREF(__pyx_t_4);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_3, function);
    }
  }
  __pyx_t_2 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_4, __pyx_v_weights) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_v_weights);
  __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 201, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_xp = __pyx_t_2;
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":202
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 *         if xp is not self.ops.xp:             # <<<<<<<<<<<<<<
 *             if xp is numpy:
 *                 self.ops = NumpyOps()
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 202, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_xp); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 202, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_5 = (__pyx_v_xp != __pyx_t_3);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = (__pyx_t_5 != 0);
  if (__pyx_t_6) {

    /* "thinc/neural/optimizers.pyx":203
 *         xp = get_array_module(weights)
 *         if xp is not self.ops.xp:
 *             if xp is numpy:             # <<<<<<<<<<<<<<
 *                 self.ops = NumpyOps()
 *             else:
 */
    __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_numpy); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 203, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_6 = (__pyx_v_xp == __pyx_t_3);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_t_5 = (__pyx_t_6 != 0);
    if (__pyx_t_5) {

      /* "thinc/neural/optimizers.pyx":204
 *         if xp is not self.ops.xp:
 *             if xp is numpy:
 *                 self.ops = NumpyOps()             # <<<<<<<<<<<<<<
 *             else:
 *                 self.ops = CupyOps()
 */
      __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_NumpyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 204, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_4 = NULL;
      if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
        __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
        if (likely(__pyx_t_4)) {
          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
          __Pyx_INCREF(__pyx_t_4);
          __Pyx_INCREF(function);
          __Pyx_DECREF_SET(__pyx_t_2, function);
        }
      }
      __pyx_t_3 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 204, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
      if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_ops, __pyx_t_3) < 0) __PYX_ERR(0, 204, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

      /* "thinc/neural/optimizers.pyx":203
 *         xp = get_array_module(weights)
 *         if xp is not self.ops.xp:
 *             if xp is numpy:             # <<<<<<<<<<<<<<
 *                 self.ops = NumpyOps()
 *             else:
 */
      goto __pyx_L4;
    }

    /* "thinc/neural/optimizers.pyx":206
 *                 self.ops = NumpyOps()
 *             else:
 *                 self.ops = CupyOps()             # <<<<<<<<<<<<<<
 * 
 *         self.nr_update[key] += 1
 */
    /*else*/ {
      __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_CupyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 206, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      __pyx_t_4 = NULL;
      if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) {
        __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_2);
        if (likely(__pyx_t_4)) {
          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2);
          __Pyx_INCREF(__pyx_t_4);
          __Pyx_INCREF(function);
          __Pyx_DECREF_SET(__pyx_t_2, function);
        }
      }
      __pyx_t_3 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_2);
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 206, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
      if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_ops, __pyx_t_3) < 0) __PYX_ERR(0, 206, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    }
    __pyx_L4:;

    /* "thinc/neural/optimizers.pyx":202
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 *         if xp is not self.ops.xp:             # <<<<<<<<<<<<<<
 *             if xp is numpy:
 *                 self.ops = NumpyOps()
 */
  }

  /* "thinc/neural/optimizers.pyx":208
 *                 self.ops = CupyOps()
 * 
 *         self.nr_update[key] += 1             # <<<<<<<<<<<<<<
 *         nr_upd = self.nr_update[key]
 *         if self.L2 != 0 and not self.L2_is_weight_decay:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_nr_update); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 208, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_INCREF(__pyx_v_key);
  __pyx_t_2 = __pyx_v_key;
  __pyx_t_4 = __Pyx_PyObject_GetItem(__pyx_t_3, __pyx_t_2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 208, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_7 = __Pyx_PyInt_AddObjC(__pyx_t_4, __pyx_int_1, 1, 1, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 208, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (unlikely(PyObject_SetItem(__pyx_t_3, __pyx_t_2, __pyx_t_7) < 0)) __PYX_ERR(0, 208, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":209
 * 
 *         self.nr_update[key] += 1
 *         nr_upd = self.nr_update[key]             # <<<<<<<<<<<<<<
 *         if self.L2 != 0 and not self.L2_is_weight_decay:
 *             gradient += self.L2 * weights
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_nr_update); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 209, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_GetItem(__pyx_t_3, __pyx_v_key); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 209, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_v_nr_upd = __pyx_t_2;
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":210
 *         self.nr_update[key] += 1
 *         nr_upd = self.nr_update[key]
 *         if self.L2 != 0 and not self.L2_is_weight_decay:             # <<<<<<<<<<<<<<
 *             gradient += self.L2 * weights
 *         if self.max_grad_norm:
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 210, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyInt_NeObjC(__pyx_t_2, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 210, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 210, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (__pyx_t_6) {
  } else {
    __pyx_t_5 = __pyx_t_6;
    goto __pyx_L6_bool_binop_done;
  }
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 210, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 210, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_8 = ((!__pyx_t_6) != 0);
  __pyx_t_5 = __pyx_t_8;
  __pyx_L6_bool_binop_done:;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":211
 *         nr_upd = self.nr_update[key]
 *         if self.L2 != 0 and not self.L2_is_weight_decay:
 *             gradient += self.L2 * weights             # <<<<<<<<<<<<<<
 *         if self.max_grad_norm:
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 */
    __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 211, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_2 = PyNumber_Multiply(__pyx_t_3, __pyx_v_weights); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 211, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_t_3 = PyNumber_InPlaceAdd(__pyx_v_gradient, __pyx_t_2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 211, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    __Pyx_DECREF_SET(__pyx_v_gradient, __pyx_t_3);
    __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":210
 *         self.nr_update[key] += 1
 *         nr_upd = self.nr_update[key]
 *         if self.L2 != 0 and not self.L2_is_weight_decay:             # <<<<<<<<<<<<<<
 *             gradient += self.L2 * weights
 *         if self.max_grad_norm:
 */
  }

  /* "thinc/neural/optimizers.pyx":212
 *         if self.L2 != 0 and not self.L2_is_weight_decay:
 *             gradient += self.L2 * weights
 *         if self.max_grad_norm:             # <<<<<<<<<<<<<<
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 *         if self.gradient_noise:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 212, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 212, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":213
 *             gradient += self.L2 * weights
 *         if self.max_grad_norm:
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)             # <<<<<<<<<<<<<<
 *         if self.gradient_noise:
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 */
    __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 213, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_clip_gradient); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 213, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 213, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __pyx_t_4 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_7);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_7, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_v_gradient, __pyx_t_2};
      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 2+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 213, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_v_gradient, __pyx_t_2};
      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 2+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 213, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    } else
    #endif
    {
      __pyx_t_10 = PyTuple_New(2+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 213, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      if (__pyx_t_4) {
        __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_4); __pyx_t_4 = NULL;
      }
      __Pyx_INCREF(__pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_v_gradient);
      PyTuple_SET_ITEM(__pyx_t_10, 0+__pyx_t_9, __pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_t_2);
      PyTuple_SET_ITEM(__pyx_t_10, 1+__pyx_t_9, __pyx_t_2);
      __pyx_t_2 = 0;
      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_10, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 213, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":212
 *         if self.L2 != 0 and not self.L2_is_weight_decay:
 *             gradient += self.L2 * weights
 *         if self.max_grad_norm:             # <<<<<<<<<<<<<<
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 *         if self.gradient_noise:
 */
  }

  /* "thinc/neural/optimizers.pyx":214
 *         if self.max_grad_norm:
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 *         if self.gradient_noise:             # <<<<<<<<<<<<<<
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 *         if self.use_radam:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_gradient_noise); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 214, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 214, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":215
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 *         if self.gradient_noise:
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)             # <<<<<<<<<<<<<<
 *         if self.use_radam:
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 */
    __Pyx_GetModuleGlobalName(__pyx_t_7, __pyx_n_s_add_gradient_noise); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 215, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_gradient_noise); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 215, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_10);
    __pyx_t_2 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_7))) {
      __pyx_t_2 = PyMethod_GET_SELF(__pyx_t_7);
      if (likely(__pyx_t_2)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
        __Pyx_INCREF(__pyx_t_2);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_7, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[4] = {__pyx_t_2, __pyx_v_gradient, __pyx_t_10, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 215, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[4] = {__pyx_t_2, __pyx_v_gradient, __pyx_t_10, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 215, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    } else
    #endif
    {
      __pyx_t_4 = PyTuple_New(3+__pyx_t_9); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 215, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      if (__pyx_t_2) {
        __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_2); __pyx_t_2 = NULL;
      }
      __Pyx_INCREF(__pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_v_gradient);
      PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_9, __pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_t_10);
      PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_9, __pyx_t_10);
      __Pyx_INCREF(__pyx_v_nr_upd);
      __Pyx_GIVEREF(__pyx_v_nr_upd);
      PyTuple_SET_ITEM(__pyx_t_4, 2+__pyx_t_9, __pyx_v_nr_upd);
      __pyx_t_10 = 0;
      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 215, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":214
 *         if self.max_grad_norm:
 *             self.ops.clip_gradient(gradient, self.max_grad_norm)
 *         if self.gradient_noise:             # <<<<<<<<<<<<<<
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 *         if self.use_radam:
 */
  }

  /* "thinc/neural/optimizers.pyx":216
 *         if self.gradient_noise:
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 *         if self.use_radam:             # <<<<<<<<<<<<<<
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and self.b2 > 0.:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_use_radam); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 216, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 216, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":217
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 *         if self.use_radam:
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)             # <<<<<<<<<<<<<<
 *         elif self.b1 > 0. and self.b2 > 0.:
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 */
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_radam); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 217, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_4 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_7);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_7, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[7] = {__pyx_t_4, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 6+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 217, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[7] = {__pyx_t_4, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 6+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 217, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    {
      __pyx_t_10 = PyTuple_New(6+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 217, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      if (__pyx_t_4) {
        __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_4); __pyx_t_4 = NULL;
      }
      __Pyx_INCREF(__pyx_v_xp);
      __Pyx_GIVEREF(__pyx_v_xp);
      PyTuple_SET_ITEM(__pyx_t_10, 0+__pyx_t_9, __pyx_v_xp);
      __Pyx_INCREF(__pyx_v_weights);
      __Pyx_GIVEREF(__pyx_v_weights);
      PyTuple_SET_ITEM(__pyx_t_10, 1+__pyx_t_9, __pyx_v_weights);
      __Pyx_INCREF(__pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_v_gradient);
      PyTuple_SET_ITEM(__pyx_t_10, 2+__pyx_t_9, __pyx_v_gradient);
      __Pyx_INCREF(__pyx_v_lr_scale);
      __Pyx_GIVEREF(__pyx_v_lr_scale);
      PyTuple_SET_ITEM(__pyx_t_10, 3+__pyx_t_9, __pyx_v_lr_scale);
      __Pyx_INCREF(__pyx_v_key);
      __Pyx_GIVEREF(__pyx_v_key);
      PyTuple_SET_ITEM(__pyx_t_10, 4+__pyx_t_9, __pyx_v_key);
      __Pyx_INCREF(__pyx_v_nr_upd);
      __Pyx_GIVEREF(__pyx_v_nr_upd);
      PyTuple_SET_ITEM(__pyx_t_10, 5+__pyx_t_9, __pyx_v_nr_upd);
      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_10, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 217, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":216
 *         if self.gradient_noise:
 *             add_gradient_noise(gradient, self.gradient_noise, nr_upd)
 *         if self.use_radam:             # <<<<<<<<<<<<<<
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and self.b2 > 0.:
 */
    goto __pyx_L10;
  }

  /* "thinc/neural/optimizers.pyx":218
 *         if self.use_radam:
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and self.b2 > 0.:             # <<<<<<<<<<<<<<
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and not self.nesterov:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_float_0_, Py_GT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  if (__pyx_t_8) {
  } else {
    __pyx_t_5 = __pyx_t_8;
    goto __pyx_L11_bool_binop_done;
  }
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_3 = PyObject_RichCompare(__pyx_t_7, __pyx_float_0_, Py_GT); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 218, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __pyx_t_8;
  __pyx_L11_bool_binop_done:;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":219
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and self.b2 > 0.:
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)             # <<<<<<<<<<<<<<
 *         elif self.b1 > 0. and not self.nesterov:
 *             raise NotImplementedError
 */
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_adam); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 219, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_10 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
      __pyx_t_10 = PyMethod_GET_SELF(__pyx_t_7);
      if (likely(__pyx_t_10)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
        __Pyx_INCREF(__pyx_t_10);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_7, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[7] = {__pyx_t_10, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 6+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 219, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[7] = {__pyx_t_10, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd};
      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 6+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 219, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    {
      __pyx_t_4 = PyTuple_New(6+__pyx_t_9); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 219, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      if (__pyx_t_10) {
        __Pyx_GIVEREF(__pyx_t_10); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_10); __pyx_t_10 = NULL;
      }
      __Pyx_INCREF(__pyx_v_xp);
      __Pyx_GIVEREF(__pyx_v_xp);
      PyTuple_SET_ITEM(__pyx_t_4, 0+__pyx_t_9, __pyx_v_xp);
      __Pyx_INCREF(__pyx_v_weights);
      __Pyx_GIVEREF(__pyx_v_weights);
      PyTuple_SET_ITEM(__pyx_t_4, 1+__pyx_t_9, __pyx_v_weights);
      __Pyx_INCREF(__pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_v_gradient);
      PyTuple_SET_ITEM(__pyx_t_4, 2+__pyx_t_9, __pyx_v_gradient);
      __Pyx_INCREF(__pyx_v_lr_scale);
      __Pyx_GIVEREF(__pyx_v_lr_scale);
      PyTuple_SET_ITEM(__pyx_t_4, 3+__pyx_t_9, __pyx_v_lr_scale);
      __Pyx_INCREF(__pyx_v_key);
      __Pyx_GIVEREF(__pyx_v_key);
      PyTuple_SET_ITEM(__pyx_t_4, 4+__pyx_t_9, __pyx_v_key);
      __Pyx_INCREF(__pyx_v_nr_upd);
      __Pyx_GIVEREF(__pyx_v_nr_upd);
      PyTuple_SET_ITEM(__pyx_t_4, 5+__pyx_t_9, __pyx_v_nr_upd);
      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 219, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":218
 *         if self.use_radam:
 *             self._radam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and self.b2 > 0.:             # <<<<<<<<<<<<<<
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and not self.nesterov:
 */
    goto __pyx_L10;
  }

  /* "thinc/neural/optimizers.pyx":220
 *         elif self.b1 > 0. and self.b2 > 0.:
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and not self.nesterov:             # <<<<<<<<<<<<<<
 *             raise NotImplementedError
 *         elif self.b1 > 0.:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 220, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_float_0_, Py_GT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 220, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 220, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  if (__pyx_t_8) {
  } else {
    __pyx_t_5 = __pyx_t_8;
    goto __pyx_L13_bool_binop_done;
  }
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_nesterov); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 220, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 220, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_6 = ((!__pyx_t_8) != 0);
  __pyx_t_5 = __pyx_t_6;
  __pyx_L13_bool_binop_done:;
  if (unlikely(__pyx_t_5)) {

    /* "thinc/neural/optimizers.pyx":221
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and not self.nesterov:
 *             raise NotImplementedError             # <<<<<<<<<<<<<<
 *         elif self.b1 > 0.:
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 */
    __Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0);
    __PYX_ERR(0, 221, __pyx_L1_error)

    /* "thinc/neural/optimizers.pyx":220
 *         elif self.b1 > 0. and self.b2 > 0.:
 *             self._adam(xp, weights, gradient, lr_scale, key, nr_upd)
 *         elif self.b1 > 0. and not self.nesterov:             # <<<<<<<<<<<<<<
 *             raise NotImplementedError
 *         elif self.b1 > 0.:
 */
  }

  /* "thinc/neural/optimizers.pyx":222
 *         elif self.b1 > 0. and not self.nesterov:
 *             raise NotImplementedError
 *         elif self.b1 > 0.:             # <<<<<<<<<<<<<<
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 *         elif self.b2 > 0.:
 */
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 222, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_3 = PyObject_RichCompare(__pyx_t_7, __pyx_float_0_, Py_GT); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 222, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 222, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":223
 *             raise NotImplementedError
 *         elif self.b1 > 0.:
 *             self._nesterov(xp, weights, gradient, lr_scale, key)             # <<<<<<<<<<<<<<
 *         elif self.b2 > 0.:
 *             raise NotImplementedError
 */
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_nesterov_2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 223, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_4 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_7);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_7, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[6] = {__pyx_t_4, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key};
      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 5+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 223, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_7)) {
      PyObject *__pyx_temp[6] = {__pyx_t_4, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key};
      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_7, __pyx_temp+1-__pyx_t_9, 5+__pyx_t_9); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 223, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_GOTREF(__pyx_t_3);
    } else
    #endif
    {
      __pyx_t_10 = PyTuple_New(5+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 223, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      if (__pyx_t_4) {
        __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_4); __pyx_t_4 = NULL;
      }
      __Pyx_INCREF(__pyx_v_xp);
      __Pyx_GIVEREF(__pyx_v_xp);
      PyTuple_SET_ITEM(__pyx_t_10, 0+__pyx_t_9, __pyx_v_xp);
      __Pyx_INCREF(__pyx_v_weights);
      __Pyx_GIVEREF(__pyx_v_weights);
      PyTuple_SET_ITEM(__pyx_t_10, 1+__pyx_t_9, __pyx_v_weights);
      __Pyx_INCREF(__pyx_v_gradient);
      __Pyx_GIVEREF(__pyx_v_gradient);
      PyTuple_SET_ITEM(__pyx_t_10, 2+__pyx_t_9, __pyx_v_gradient);
      __Pyx_INCREF(__pyx_v_lr_scale);
      __Pyx_GIVEREF(__pyx_v_lr_scale);
      PyTuple_SET_ITEM(__pyx_t_10, 3+__pyx_t_9, __pyx_v_lr_scale);
      __Pyx_INCREF(__pyx_v_key);
      __Pyx_GIVEREF(__pyx_v_key);
      PyTuple_SET_ITEM(__pyx_t_10, 4+__pyx_t_9, __pyx_v_key);
      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_10, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 223, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    }
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":222
 *         elif self.b1 > 0. and not self.nesterov:
 *             raise NotImplementedError
 *         elif self.b1 > 0.:             # <<<<<<<<<<<<<<
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 *         elif self.b2 > 0.:
 */
    goto __pyx_L10;
  }

  /* "thinc/neural/optimizers.pyx":224
 *         elif self.b1 > 0.:
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 *         elif self.b2 > 0.:             # <<<<<<<<<<<<<<
 *             raise NotImplementedError
 *         else:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 224, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_7 = PyObject_RichCompare(__pyx_t_3, __pyx_float_0_, Py_GT); __Pyx_XGOTREF(__pyx_t_7); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 224, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 224, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  if (unlikely(__pyx_t_5)) {

    /* "thinc/neural/optimizers.pyx":225
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 *         elif self.b2 > 0.:
 *             raise NotImplementedError             # <<<<<<<<<<<<<<
 *         else:
 *             weights -= lr_scale * self.alpha * gradient
 */
    __Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0);
    __PYX_ERR(0, 225, __pyx_L1_error)

    /* "thinc/neural/optimizers.pyx":224
 *         elif self.b1 > 0.:
 *             self._nesterov(xp, weights, gradient, lr_scale, key)
 *         elif self.b2 > 0.:             # <<<<<<<<<<<<<<
 *             raise NotImplementedError
 *         else:
 */
  }

  /* "thinc/neural/optimizers.pyx":227
 *             raise NotImplementedError
 *         else:
 *             weights -= lr_scale * self.alpha * gradient             # <<<<<<<<<<<<<<
 *         gradient.fill(0.)
 *         if self.L2 != 0 and self.L2_is_weight_decay:
 */
  /*else*/ {
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 227, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_3 = PyNumber_Multiply(__pyx_v_lr_scale, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 227, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __pyx_t_7 = PyNumber_Multiply(__pyx_t_3, __pyx_v_gradient); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 227, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_t_3 = PyNumber_InPlaceSubtract(__pyx_v_weights, __pyx_t_7); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 227, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF_SET(__pyx_v_weights, __pyx_t_3);
    __pyx_t_3 = 0;
  }
  __pyx_L10:;

  /* "thinc/neural/optimizers.pyx":228
 *         else:
 *             weights -= lr_scale * self.alpha * gradient
 *         gradient.fill(0.)             # <<<<<<<<<<<<<<
 *         if self.L2 != 0 and self.L2_is_weight_decay:
 *             weights -= self.L2 * weights
 */
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_gradient, __pyx_n_s_fill); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 228, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_10 = NULL;
  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
    __pyx_t_10 = PyMethod_GET_SELF(__pyx_t_7);
    if (likely(__pyx_t_10)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
      __Pyx_INCREF(__pyx_t_10);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_7, function);
    }
  }
  __pyx_t_3 = (__pyx_t_10) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_10, __pyx_float_0_) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_float_0_);
  __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0;
  if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 228, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":229
 *             weights -= lr_scale * self.alpha * gradient
 *         gradient.fill(0.)
 *         if self.L2 != 0 and self.L2_is_weight_decay:             # <<<<<<<<<<<<<<
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 */
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 229, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_7 = __Pyx_PyInt_NeObjC(__pyx_t_3, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 229, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 229, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  if (__pyx_t_6) {
  } else {
    __pyx_t_5 = __pyx_t_6;
    goto __pyx_L16_bool_binop_done;
  }
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 229, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 229, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_5 = __pyx_t_6;
  __pyx_L16_bool_binop_done:;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":230
 *         gradient.fill(0.)
 *         if self.L2 != 0 and self.L2_is_weight_decay:
 *             weights -= self.L2 * weights             # <<<<<<<<<<<<<<
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 *             if key not in self.slow_weights:
 */
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_L2); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 230, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_3 = PyNumber_Multiply(__pyx_t_7, __pyx_v_weights); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 230, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __pyx_t_7 = PyNumber_InPlaceSubtract(__pyx_v_weights, __pyx_t_3); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 230, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __Pyx_DECREF_SET(__pyx_v_weights, __pyx_t_7);
    __pyx_t_7 = 0;

    /* "thinc/neural/optimizers.pyx":229
 *             weights -= lr_scale * self.alpha * gradient
 *         gradient.fill(0.)
 *         if self.L2 != 0 and self.L2_is_weight_decay:             # <<<<<<<<<<<<<<
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 */
  }

  /* "thinc/neural/optimizers.pyx":231
 *         if self.L2 != 0 and self.L2_is_weight_decay:
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:             # <<<<<<<<<<<<<<
 *             if key not in self.slow_weights:
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 */
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lookahead_k); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  if (__pyx_t_6) {
  } else {
    __pyx_t_5 = __pyx_t_6;
    goto __pyx_L19_bool_binop_done;
  }
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_nr_update); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_3 = __Pyx_PyObject_GetItem(__pyx_t_7, __pyx_v_key); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lookahead_k); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __pyx_t_10 = PyNumber_Remainder(__pyx_t_3, __pyx_t_7); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_10);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_7 = __Pyx_PyInt_EqObjC(__pyx_t_10, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_7);
  __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
  __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 231, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
  __pyx_t_5 = __pyx_t_6;
  __pyx_L19_bool_binop_done:;
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":232
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 *             if key not in self.slow_weights:             # <<<<<<<<<<<<<<
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             slow = self.slow_weights[key]
 */
    __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_slow_weights); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 232, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_7, Py_NE)); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 232, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __pyx_t_6 = (__pyx_t_5 != 0);
    if (__pyx_t_6) {

      /* "thinc/neural/optimizers.pyx":233
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 *             if key not in self.slow_weights:
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')             # <<<<<<<<<<<<<<
 *             slow = self.slow_weights[key]
 *             slow += self.lookahead_alpha * (weights - slow)
 */
      __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_allocate); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_GIVEREF(__pyx_t_7);
      PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_7);
      __pyx_t_7 = 0;
      __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __Pyx_GIVEREF(__pyx_t_3);
      PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_3);
      __pyx_t_3 = 0;
      __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_n_s_float32) < 0) __PYX_ERR(0, 233, __pyx_L1_error)
      __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_7, __pyx_t_3); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
      __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_slow_weights); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      if (unlikely(PyObject_SetItem(__pyx_t_3, __pyx_v_key, __pyx_t_4) < 0)) __PYX_ERR(0, 233, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

      /* "thinc/neural/optimizers.pyx":232
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:
 *             if key not in self.slow_weights:             # <<<<<<<<<<<<<<
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             slow = self.slow_weights[key]
 */
    }

    /* "thinc/neural/optimizers.pyx":234
 *             if key not in self.slow_weights:
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             slow = self.slow_weights[key]             # <<<<<<<<<<<<<<
 *             slow += self.lookahead_alpha * (weights - slow)
 *             weights[:] = slow
 */
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_slow_weights); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 234, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_3 = __Pyx_PyObject_GetItem(__pyx_t_4, __pyx_v_key); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 234, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_v_slow = __pyx_t_3;
    __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":235
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             slow = self.slow_weights[key]
 *             slow += self.lookahead_alpha * (weights - slow)             # <<<<<<<<<<<<<<
 *             weights[:] = slow
 *         if self.averages is not None:
 */
    __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lookahead_alpha); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 235, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_4 = PyNumber_Subtract(__pyx_v_weights, __pyx_v_slow); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 235, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_7 = PyNumber_Multiply(__pyx_t_3, __pyx_t_4); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 235, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_slow, __pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 235, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF_SET(__pyx_v_slow, __pyx_t_4);
    __pyx_t_4 = 0;

    /* "thinc/neural/optimizers.pyx":236
 *             slow = self.slow_weights[key]
 *             slow += self.lookahead_alpha * (weights - slow)
 *             weights[:] = slow             # <<<<<<<<<<<<<<
 *         if self.averages is not None:
 *             if key not in self.averages:
 */
    if (__Pyx_PyObject_SetSlice(__pyx_v_weights, __pyx_v_slow, 0, 0, NULL, NULL, &__pyx_slice__30, 0, 0, 1) < 0) __PYX_ERR(0, 236, __pyx_L1_error)

    /* "thinc/neural/optimizers.pyx":231
 *         if self.L2 != 0 and self.L2_is_weight_decay:
 *             weights -= self.L2 * weights
 *         if self.lookahead_k and self.nr_update[key] % self.lookahead_k == 0:             # <<<<<<<<<<<<<<
 *             if key not in self.slow_weights:
 *                 self.slow_weights[key] = self.ops.allocate((weights.size,), dtype='float32')
 */
  }

  /* "thinc/neural/optimizers.pyx":237
 *             slow += self.lookahead_alpha * (weights - slow)
 *             weights[:] = slow
 *         if self.averages is not None:             # <<<<<<<<<<<<<<
 *             if key not in self.averages:
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')
 */
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 237, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_6 = (__pyx_t_4 != Py_None);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_5 = (__pyx_t_6 != 0);
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":238
 *             weights[:] = slow
 *         if self.averages is not None:
 *             if key not in self.averages:             # <<<<<<<<<<<<<<
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 */
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 238, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_5 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_4, Py_NE)); if (unlikely(__pyx_t_5 < 0)) __PYX_ERR(0, 238, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_6 = (__pyx_t_5 != 0);
    if (__pyx_t_6) {

      /* "thinc/neural/optimizers.pyx":239
 *         if self.averages is not None:
 *             if key not in self.averages:
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')             # <<<<<<<<<<<<<<
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 * 
 */
      __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_allocate); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_7);
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      __Pyx_GIVEREF(__pyx_t_4);
      PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_4);
      __pyx_t_4 = 0;
      __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_4);
      __Pyx_GIVEREF(__pyx_t_3);
      PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3);
      __pyx_t_3 = 0;
      __pyx_t_3 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_n_s_float32) < 0) __PYX_ERR(0, 239, __pyx_L1_error)
      __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_4, __pyx_t_3); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
      __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_3);
      if (unlikely(PyObject_SetItem(__pyx_t_3, __pyx_v_key, __pyx_t_10) < 0)) __PYX_ERR(0, 239, __pyx_L1_error)
      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
      __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;

      /* "thinc/neural/optimizers.pyx":238
 *             weights[:] = slow
 *         if self.averages is not None:
 *             if key not in self.averages:             # <<<<<<<<<<<<<<
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 */
    }

    /* "thinc/neural/optimizers.pyx":240
 *             if key not in self.averages:
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)             # <<<<<<<<<<<<<<
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 */
    __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 240, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_update_averages); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 240, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_averages); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 240, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_7 = __Pyx_PyObject_GetItem(__pyx_t_3, __pyx_v_key); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 240, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_t_3 = NULL;
    __pyx_t_9 = 0;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) {
      __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_4);
      if (likely(__pyx_t_3)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
        __Pyx_INCREF(__pyx_t_3);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_4, function);
        __pyx_t_9 = 1;
      }
    }
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(__pyx_t_4)) {
      PyObject *__pyx_temp[4] = {__pyx_t_3, __pyx_t_7, __pyx_v_weights, __pyx_v_nr_upd};
      __pyx_t_10 = __Pyx_PyFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 240, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
      __Pyx_GOTREF(__pyx_t_10);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(__pyx_t_4)) {
      PyObject *__pyx_temp[4] = {__pyx_t_3, __pyx_t_7, __pyx_v_weights, __pyx_v_nr_upd};
      __pyx_t_10 = __Pyx_PyCFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 240, __pyx_L1_error)
      __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
      __Pyx_GOTREF(__pyx_t_10);
      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    } else
    #endif
    {
      __pyx_t_2 = PyTuple_New(3+__pyx_t_9); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 240, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_2);
      if (__pyx_t_3) {
        __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_3); __pyx_t_3 = NULL;
      }
      __Pyx_GIVEREF(__pyx_t_7);
      PyTuple_SET_ITEM(__pyx_t_2, 0+__pyx_t_9, __pyx_t_7);
      __Pyx_INCREF(__pyx_v_weights);
      __Pyx_GIVEREF(__pyx_v_weights);
      PyTuple_SET_ITEM(__pyx_t_2, 1+__pyx_t_9, __pyx_v_weights);
      __Pyx_INCREF(__pyx_v_nr_upd);
      __Pyx_GIVEREF(__pyx_v_nr_upd);
      PyTuple_SET_ITEM(__pyx_t_2, 2+__pyx_t_9, __pyx_v_nr_upd);
      __pyx_t_7 = 0;
      __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_2, NULL); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 240, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_10);
      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    }
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;

    /* "thinc/neural/optimizers.pyx":237
 *             slow += self.lookahead_alpha * (weights - slow)
 *             weights[:] = slow
 *         if self.averages is not None:             # <<<<<<<<<<<<<<
 *             if key not in self.averages:
 *                 self.averages[key] = self.ops.allocate((weights.size,), dtype='float32')
 */
  }

  /* "thinc/neural/optimizers.pyx":199
 *         return alpha * numpy.sqrt(fix2) / fix1
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):             # <<<<<<<<<<<<<<
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_7);
  __Pyx_XDECREF(__pyx_t_10);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_xp);
  __Pyx_XDECREF(__pyx_v_nr_upd);
  __Pyx_XDECREF(__pyx_v_slow);
  __Pyx_XDECREF(__pyx_v_weights);
  __Pyx_XDECREF(__pyx_v_gradient);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":242
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_19_radam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_19_radam = {"_radam", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_19_radam, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_19_radam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  CYTHON_UNUSED PyObject *__pyx_v_xp = 0;
  PyObject *__pyx_v_weights = 0;
  PyObject *__pyx_v_gradient = 0;
  PyObject *__pyx_v_lr_scale = 0;
  PyObject *__pyx_v_key = 0;
  PyObject *__pyx_v_nr_upd = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("_radam (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_xp,&__pyx_n_s_weights,&__pyx_n_s_gradient,&__pyx_n_s_lr_scale,&__pyx_n_s_key,&__pyx_n_s_nr_upd,0};
    PyObject* values[7] = {0,0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_xp)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 1); __PYX_ERR(0, 242, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 2); __PYX_ERR(0, 242, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_gradient)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 3); __PYX_ERR(0, 242, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lr_scale)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 4); __PYX_ERR(0, 242, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_key)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 5); __PYX_ERR(0, 242, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (likely((values[6] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nr_upd)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, 6); __PYX_ERR(0, 242, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_radam") < 0)) __PYX_ERR(0, 242, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 7) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
      values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
    }
    __pyx_v_self = values[0];
    __pyx_v_xp = values[1];
    __pyx_v_weights = values[2];
    __pyx_v_gradient = values[3];
    __pyx_v_lr_scale = values[4];
    __pyx_v_key = values[5];
    __pyx_v_nr_upd = values[6];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("_radam", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 242, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._radam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_18_radam(__pyx_self, __pyx_v_self, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_18_radam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key, PyObject *__pyx_v_nr_upd) {
  PyObject *__pyx_v_beta1 = NULL;
  PyObject *__pyx_v_beta2 = NULL;
  PyObject *__pyx_v_eps = NULL;
  PyObject *__pyx_v_sma_inf = NULL;
  PyObject *__pyx_v_exp_avg = NULL;
  PyObject *__pyx_v_exp_avg_sq = NULL;
  PyObject *__pyx_v_bias_correction1 = NULL;
  PyObject *__pyx_v_bias_correction2 = NULL;
  PyObject *__pyx_v_sma_t = NULL;
  PyObject *__pyx_v_update = NULL;
  PyObject *__pyx_v_r_t = NULL;
  PyObject *__pyx_v_w_norm = NULL;
  PyObject *__pyx_v_u_norm = NULL;
  PyObject *__pyx_v_phi_p = NULL;
  PyObject *__pyx_v_local_lr = NULL;
  PyObject *__pyx_v_lr = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  int __pyx_t_2;
  int __pyx_t_3;
  PyObject *__pyx_t_4 = NULL;
  PyObject *__pyx_t_5 = NULL;
  PyObject *__pyx_t_6 = NULL;
  PyObject *__pyx_t_7 = NULL;
  PyObject *__pyx_t_8 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__31)
  __Pyx_RefNannySetupContext("_radam", 0);
  __Pyx_TraceCall("_radam", __pyx_f[0], 242, 0, __PYX_ERR(0, 242, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_weights);

  /* "thinc/neural/optimizers.pyx":243
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 243, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_1, Py_NE)); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 243, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_3 = (__pyx_t_2 != 0);
  if (__pyx_t_3) {

    /* "thinc/neural/optimizers.pyx":244
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)             # <<<<<<<<<<<<<<
 *         if key not in self.mom2:
 *             self.mom2[key] = self.ops.allocate(weights.size)
 */
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_allocate); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_5, function);
      }
    }
    __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_4);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    if (unlikely(PyObject_SetItem(__pyx_t_5, __pyx_v_key, __pyx_t_1) < 0)) __PYX_ERR(0, 244, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    /* "thinc/neural/optimizers.pyx":243
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 */
  }

  /* "thinc/neural/optimizers.pyx":245
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:             # <<<<<<<<<<<<<<
 *             self.mom2[key] = self.ops.allocate(weights.size)
 * 
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 245, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_1, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 245, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_2 = (__pyx_t_3 != 0);
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":246
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 *             self.mom2[key] = self.ops.allocate(weights.size)             # <<<<<<<<<<<<<<
 * 
 *         beta1 = self.b1
 */
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_allocate); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_4);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_4, function);
      }
    }
    __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_t_5) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_5);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    if (unlikely(PyObject_SetItem(__pyx_t_4, __pyx_v_key, __pyx_t_1) < 0)) __PYX_ERR(0, 246, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    /* "thinc/neural/optimizers.pyx":245
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:             # <<<<<<<<<<<<<<
 *             self.mom2[key] = self.ops.allocate(weights.size)
 * 
 */
  }

  /* "thinc/neural/optimizers.pyx":248
 *             self.mom2[key] = self.ops.allocate(weights.size)
 * 
 *         beta1 = self.b1             # <<<<<<<<<<<<<<
 *         beta2 = self.b2
 *         eps = self.eps
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 248, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_beta1 = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":249
 * 
 *         beta1 = self.b1
 *         beta2 = self.b2             # <<<<<<<<<<<<<<
 *         eps = self.eps
 *         sma_inf = 2 / (1-beta2) - 1
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 249, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_beta2 = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":250
 *         beta1 = self.b1
 *         beta2 = self.b2
 *         eps = self.eps             # <<<<<<<<<<<<<<
 *         sma_inf = 2 / (1-beta2) - 1
 * 
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_eps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 250, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_v_eps = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":251
 *         beta2 = self.b2
 *         eps = self.eps
 *         sma_inf = 2 / (1-beta2) - 1             # <<<<<<<<<<<<<<
 * 
 *         exp_avg = self.mom1[key]
 */
  __pyx_t_1 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_v_beta2, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 251, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = __Pyx_PyNumber_Divide(__pyx_int_2, __pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 251, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyInt_SubtractObjC(__pyx_t_4, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 251, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_v_sma_inf = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":253
 *         sma_inf = 2 / (1-beta2) - 1
 * 
 *         exp_avg = self.mom1[key]             # <<<<<<<<<<<<<<
 *         exp_avg_sq = self.mom2[key]
 *         # Decay the first and second moment running average coefficient
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 253, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_key); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 253, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_exp_avg = __pyx_t_4;
  __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":254
 * 
 *         exp_avg = self.mom1[key]
 *         exp_avg_sq = self.mom2[key]             # <<<<<<<<<<<<<<
 *         # Decay the first and second moment running average coefficient
 *         exp_avg *= beta1
 */
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 254, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_1 = __Pyx_PyObject_GetItem(__pyx_t_4, __pyx_v_key); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 254, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_v_exp_avg_sq = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":256
 *         exp_avg_sq = self.mom2[key]
 *         # Decay the first and second moment running average coefficient
 *         exp_avg *= beta1             # <<<<<<<<<<<<<<
 *         exp_avg += (1-beta1) * gradient
 *         exp_avg_sq *= beta2
 */
  __pyx_t_1 = PyNumber_InPlaceMultiply(__pyx_v_exp_avg, __pyx_v_beta1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 256, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF_SET(__pyx_v_exp_avg, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":257
 *         # Decay the first and second moment running average coefficient
 *         exp_avg *= beta1
 *         exp_avg += (1-beta1) * gradient             # <<<<<<<<<<<<<<
 *         exp_avg_sq *= beta2
 *         exp_avg_sq += (1-beta2) * gradient**2
 */
  __pyx_t_1 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_v_beta1, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = PyNumber_Multiply(__pyx_t_1, __pyx_v_gradient); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 257, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyNumber_InPlaceAdd(__pyx_v_exp_avg, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 257, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __Pyx_DECREF_SET(__pyx_v_exp_avg, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":258
 *         exp_avg *= beta1
 *         exp_avg += (1-beta1) * gradient
 *         exp_avg_sq *= beta2             # <<<<<<<<<<<<<<
 *         exp_avg_sq += (1-beta2) * gradient**2
 *         # Bias correction
 */
  __pyx_t_1 = PyNumber_InPlaceMultiply(__pyx_v_exp_avg_sq, __pyx_v_beta2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 258, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF_SET(__pyx_v_exp_avg_sq, __pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":259
 *         exp_avg += (1-beta1) * gradient
 *         exp_avg_sq *= beta2
 *         exp_avg_sq += (1-beta2) * gradient**2             # <<<<<<<<<<<<<<
 *         # Bias correction
 *         bias_correction1 = 1 - beta1 ** nr_upd
 */
  __pyx_t_1 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_v_beta2, 1, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 259, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = PyNumber_Power(__pyx_v_gradient, __pyx_int_2, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 259, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = PyNumber_Multiply(__pyx_t_1, __pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 259, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_exp_avg_sq, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 259, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __Pyx_DECREF_SET(__pyx_v_exp_avg_sq, __pyx_t_4);
  __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":261
 *         exp_avg_sq += (1-beta2) * gradient**2
 *         # Bias correction
 *         bias_correction1 = 1 - beta1 ** nr_upd             # <<<<<<<<<<<<<<
 *         bias_correction2 = 1 - beta2 ** nr_upd
 * 
 */
  __pyx_t_4 = PyNumber_Power(__pyx_v_beta1, __pyx_v_nr_upd, Py_None); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 261, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_t_4, 1, 0, 0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 261, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_v_bias_correction1 = __pyx_t_5;
  __pyx_t_5 = 0;

  /* "thinc/neural/optimizers.pyx":262
 *         # Bias correction
 *         bias_correction1 = 1 - beta1 ** nr_upd
 *         bias_correction2 = 1 - beta2 ** nr_upd             # <<<<<<<<<<<<<<
 * 
 *         # Compute length of SMA
 */
  __pyx_t_5 = PyNumber_Power(__pyx_v_beta2, __pyx_v_nr_upd, Py_None); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 262, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_4 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_t_5, 1, 0, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 262, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_v_bias_correction2 = __pyx_t_4;
  __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":265
 * 
 *         # Compute length of SMA
 *         sma_t = sma_inf - 2 * nr_upd * (1 - bias_correction2) / bias_correction2             # <<<<<<<<<<<<<<
 *         update = self.ops.allocate(weights.shape, dtype="f")
 *         if sma_t > 4:
 */
  __pyx_t_4 = PyNumber_Multiply(__pyx_int_2, __pyx_v_nr_upd); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 265, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyInt_SubtractCObj(__pyx_int_1, __pyx_v_bias_correction2, 1, 0, 0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 265, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_1 = PyNumber_Multiply(__pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 265, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_5 = __Pyx_PyNumber_Divide(__pyx_t_1, __pyx_v_bias_correction2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 265, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = PyNumber_Subtract(__pyx_v_sma_inf, __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 265, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_v_sma_t = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":266
 *         # Compute length of SMA
 *         sma_t = sma_inf - 2 * nr_upd * (1 - bias_correction2) / bias_correction2
 *         update = self.ops.allocate(weights.shape, dtype="f")             # <<<<<<<<<<<<<<
 *         if sma_t > 4:
 *             # Variance rectification term
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_allocate); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_shape); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_GIVEREF(__pyx_t_1);
  PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1);
  __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_n_s_f) < 0) __PYX_ERR(0, 266, __pyx_L1_error)
  __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 266, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_update = __pyx_t_6;
  __pyx_t_6 = 0;

  /* "thinc/neural/optimizers.pyx":267
 *         sma_t = sma_inf - 2 * nr_upd * (1 - bias_correction2) / bias_correction2
 *         update = self.ops.allocate(weights.shape, dtype="f")
 *         if sma_t > 4:             # <<<<<<<<<<<<<<
 *             # Variance rectification term
 *             r_t = math.sqrt((sma_t - 4) * (sma_t - 2) * sma_inf / ((sma_inf - 4) * (sma_inf - 2) * sma_t))
 */
  __pyx_t_6 = PyObject_RichCompare(__pyx_v_sma_t, __pyx_int_4, Py_GT); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 267, __pyx_L1_error)
  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 267, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":269
 *         if sma_t > 4:
 *             # Variance rectification term
 *             r_t = math.sqrt((sma_t - 4) * (sma_t - 2) * sma_inf / ((sma_inf - 4) * (sma_inf - 2) * sma_t))             # <<<<<<<<<<<<<<
 *             # Adaptive momentum
 *             update += r_t * (
 */
    __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_math); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sqrt); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = __Pyx_PyInt_SubtractObjC(__pyx_v_sma_t, __pyx_int_4, 4, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_5 = __Pyx_PyInt_SubtractObjC(__pyx_v_sma_t, __pyx_int_2, 2, 0, 0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_7 = PyNumber_Multiply(__pyx_t_1, __pyx_t_5); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = PyNumber_Multiply(__pyx_t_7, __pyx_v_sma_inf); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __pyx_t_7 = __Pyx_PyInt_SubtractObjC(__pyx_v_sma_inf, __pyx_int_4, 4, 0, 0); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_7);
    __pyx_t_1 = __Pyx_PyInt_SubtractObjC(__pyx_v_sma_inf, __pyx_int_2, 2, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_8 = PyNumber_Multiply(__pyx_t_7, __pyx_t_1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = PyNumber_Multiply(__pyx_t_8, __pyx_v_sma_t); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_8 = __Pyx_PyNumber_Divide(__pyx_t_5, __pyx_t_1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = NULL;
    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
      __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_4);
      if (likely(__pyx_t_1)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
        __Pyx_INCREF(__pyx_t_1);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_4, function);
      }
    }
    __pyx_t_6 = (__pyx_t_1) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_1, __pyx_t_8) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_8);
    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 269, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_v_r_t = __pyx_t_6;
    __pyx_t_6 = 0;

    /* "thinc/neural/optimizers.pyx":272
 *             # Adaptive momentum
 *             update += r_t * (
 *                 (exp_avg / bias_correction1)             # <<<<<<<<<<<<<<
 *                 /
 *                 (self.ops.xp.sqrt(exp_avg_sq / bias_correction2) + eps)
 */
    __pyx_t_6 = __Pyx_PyNumber_Divide(__pyx_v_exp_avg, __pyx_v_bias_correction1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 272, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);

    /* "thinc/neural/optimizers.pyx":274
 *                 (exp_avg / bias_correction1)
 *                 /
 *                 (self.ops.xp.sqrt(exp_avg_sq / bias_correction2) + eps)             # <<<<<<<<<<<<<<
 *             )
 *         else:
 */
    __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_xp); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sqrt); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = __Pyx_PyNumber_Divide(__pyx_v_exp_avg_sq, __pyx_v_bias_correction2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_5 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_8))) {
      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_8);
      if (likely(__pyx_t_5)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
        __Pyx_INCREF(__pyx_t_5);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_8, function);
      }
    }
    __pyx_t_4 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_8, __pyx_t_5, __pyx_t_1) : __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_t_1);
    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_8 = PyNumber_Add(__pyx_t_4, __pyx_v_eps); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 274, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

    /* "thinc/neural/optimizers.pyx":273
 *             update += r_t * (
 *                 (exp_avg / bias_correction1)
 *                 /             # <<<<<<<<<<<<<<
 *                 (self.ops.xp.sqrt(exp_avg_sq / bias_correction2) + eps)
 *             )
 */
    __pyx_t_4 = __Pyx_PyNumber_Divide(__pyx_t_6, __pyx_t_8); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 273, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;

    /* "thinc/neural/optimizers.pyx":271
 *             r_t = math.sqrt((sma_t - 4) * (sma_t - 2) * sma_inf / ((sma_inf - 4) * (sma_inf - 2) * sma_t))
 *             # Adaptive momentum
 *             update += r_t * (             # <<<<<<<<<<<<<<
 *                 (exp_avg / bias_correction1)
 *                 /
 */
    __pyx_t_8 = PyNumber_Multiply(__pyx_v_r_t, __pyx_t_4); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 271, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_update, __pyx_t_8); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __Pyx_DECREF_SET(__pyx_v_update, __pyx_t_4);
    __pyx_t_4 = 0;

    /* "thinc/neural/optimizers.pyx":267
 *         sma_t = sma_inf - 2 * nr_upd * (1 - bias_correction2) / bias_correction2
 *         update = self.ops.allocate(weights.shape, dtype="f")
 *         if sma_t > 4:             # <<<<<<<<<<<<<<
 *             # Variance rectification term
 *             r_t = math.sqrt((sma_t - 4) * (sma_t - 2) * sma_inf / ((sma_inf - 4) * (sma_inf - 2) * sma_t))
 */
    goto __pyx_L5;
  }

  /* "thinc/neural/optimizers.pyx":278
 *         else:
 *             # Unadapted momentum
 *             update += exp_avg / bias_correction1             # <<<<<<<<<<<<<<
 *         if self.use_lars:
 *             # LARS
 */
  /*else*/ {
    __pyx_t_4 = __Pyx_PyNumber_Divide(__pyx_v_exp_avg, __pyx_v_bias_correction1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 278, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_8 = PyNumber_InPlaceAdd(__pyx_v_update, __pyx_t_4); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 278, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF_SET(__pyx_v_update, __pyx_t_8);
    __pyx_t_8 = 0;
  }
  __pyx_L5:;

  /* "thinc/neural/optimizers.pyx":279
 *             # Unadapted momentum
 *             update += exp_avg / bias_correction1
 *         if self.use_lars:             # <<<<<<<<<<<<<<
 *             # LARS
 *             w_norm = self.ops.xp.linalg.norm(weights)
 */
  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_use_lars); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 279, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 279, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":281
 *         if self.use_lars:
 *             # LARS
 *             w_norm = self.ops.xp.linalg.norm(weights)             # <<<<<<<<<<<<<<
 *             u_norm = self.ops.xp.linalg.norm(update)
 *             phi_p = min(max(w_norm, self.lars_min), self.lars_max)
 */
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 281, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_xp); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 281, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_linalg); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 281, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_norm); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 281, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_6, function);
      }
    }
    __pyx_t_8 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_weights) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_weights);
    __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
    if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 281, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __pyx_v_w_norm = __pyx_t_8;
    __pyx_t_8 = 0;

    /* "thinc/neural/optimizers.pyx":282
 *             # LARS
 *             w_norm = self.ops.xp.linalg.norm(weights)
 *             u_norm = self.ops.xp.linalg.norm(update)             # <<<<<<<<<<<<<<
 *             phi_p = min(max(w_norm, self.lars_min), self.lars_max)
 *             # Compute the local LR
 */
    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 282, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_xp); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 282, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_linalg); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 282, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_6);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_norm); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 282, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_4);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_4, function);
      }
    }
    __pyx_t_8 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_v_update) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_update);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 282, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_v_u_norm = __pyx_t_8;
    __pyx_t_8 = 0;

    /* "thinc/neural/optimizers.pyx":283
 *             w_norm = self.ops.xp.linalg.norm(weights)
 *             u_norm = self.ops.xp.linalg.norm(update)
 *             phi_p = min(max(w_norm, self.lars_min), self.lars_max)             # <<<<<<<<<<<<<<
 *             # Compute the local LR
 *             if phi_p == 0 or u_norm == 0:
 */
    __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lars_max); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 283, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lars_min); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 283, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_INCREF(__pyx_v_w_norm);
    __pyx_t_6 = __pyx_v_w_norm;
    __pyx_t_5 = PyObject_RichCompare(__pyx_t_4, __pyx_t_6, Py_GT); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 283, __pyx_L1_error)
    __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 283, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    if (__pyx_t_2) {
      __Pyx_INCREF(__pyx_t_4);
      __pyx_t_1 = __pyx_t_4;
    } else {
      __Pyx_INCREF(__pyx_t_6);
      __pyx_t_1 = __pyx_t_6;
    }
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_INCREF(__pyx_t_1);
    __pyx_t_4 = __pyx_t_1;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_6 = PyObject_RichCompare(__pyx_t_8, __pyx_t_4, Py_LT); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 283, __pyx_L1_error)
    __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 283, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    if (__pyx_t_2) {
      __Pyx_INCREF(__pyx_t_8);
      __pyx_t_1 = __pyx_t_8;
    } else {
      __Pyx_INCREF(__pyx_t_4);
      __pyx_t_1 = __pyx_t_4;
    }
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_8 = __pyx_t_1;
    __Pyx_INCREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_v_phi_p = __pyx_t_8;
    __pyx_t_8 = 0;

    /* "thinc/neural/optimizers.pyx":285
 *             phi_p = min(max(w_norm, self.lars_min), self.lars_max)
 *             # Compute the local LR
 *             if phi_p == 0 or u_norm == 0:             # <<<<<<<<<<<<<<
 *                 local_lr = 1
 *             else:
 */
    __pyx_t_8 = __Pyx_PyInt_EqObjC(__pyx_v_phi_p, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 285, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 285, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    if (!__pyx_t_3) {
    } else {
      __pyx_t_2 = __pyx_t_3;
      goto __pyx_L8_bool_binop_done;
    }
    __pyx_t_8 = __Pyx_PyInt_EqObjC(__pyx_v_u_norm, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 285, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 285, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_2 = __pyx_t_3;
    __pyx_L8_bool_binop_done:;
    if (__pyx_t_2) {

      /* "thinc/neural/optimizers.pyx":286
 *             # Compute the local LR
 *             if phi_p == 0 or u_norm == 0:
 *                 local_lr = 1             # <<<<<<<<<<<<<<
 *             else:
 *                 local_lr = phi_p / u_norm
 */
      __Pyx_INCREF(__pyx_int_1);
      __pyx_v_local_lr = __pyx_int_1;

      /* "thinc/neural/optimizers.pyx":285
 *             phi_p = min(max(w_norm, self.lars_min), self.lars_max)
 *             # Compute the local LR
 *             if phi_p == 0 or u_norm == 0:             # <<<<<<<<<<<<<<
 *                 local_lr = 1
 *             else:
 */
      goto __pyx_L7;
    }

    /* "thinc/neural/optimizers.pyx":288
 *                 local_lr = 1
 *             else:
 *                 local_lr = phi_p / u_norm             # <<<<<<<<<<<<<<
 *             lr = self.alpha * lr_scale * local_lr
 *         else:
 */
    /*else*/ {
      __pyx_t_8 = __Pyx_PyNumber_Divide(__pyx_v_phi_p, __pyx_v_u_norm); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 288, __pyx_L1_error)
      __Pyx_GOTREF(__pyx_t_8);
      __pyx_v_local_lr = __pyx_t_8;
      __pyx_t_8 = 0;
    }
    __pyx_L7:;

    /* "thinc/neural/optimizers.pyx":289
 *             else:
 *                 local_lr = phi_p / u_norm
 *             lr = self.alpha * lr_scale * local_lr             # <<<<<<<<<<<<<<
 *         else:
 *             lr = self.alpha * lr_scale
 */
    __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 289, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_1 = PyNumber_Multiply(__pyx_t_8, __pyx_v_lr_scale); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 289, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_t_8 = PyNumber_Multiply(__pyx_t_1, __pyx_v_local_lr); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 289, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_v_lr = __pyx_t_8;
    __pyx_t_8 = 0;

    /* "thinc/neural/optimizers.pyx":279
 *             # Unadapted momentum
 *             update += exp_avg / bias_correction1
 *         if self.use_lars:             # <<<<<<<<<<<<<<
 *             # LARS
 *             w_norm = self.ops.xp.linalg.norm(weights)
 */
    goto __pyx_L6;
  }

  /* "thinc/neural/optimizers.pyx":291
 *             lr = self.alpha * lr_scale * local_lr
 *         else:
 *             lr = self.alpha * lr_scale             # <<<<<<<<<<<<<<
 *         weights -= lr * update
 * 
 */
  /*else*/ {
    __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 291, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    __pyx_t_1 = PyNumber_Multiply(__pyx_t_8, __pyx_v_lr_scale); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 291, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
    __pyx_v_lr = __pyx_t_1;
    __pyx_t_1 = 0;
  }
  __pyx_L6:;

  /* "thinc/neural/optimizers.pyx":292
 *         else:
 *             lr = self.alpha * lr_scale
 *         weights -= lr * update             # <<<<<<<<<<<<<<
 * 
 *     def _nesterov(self, xp, weights, gradient, lr_scale, key):
 */
  __pyx_t_1 = PyNumber_Multiply(__pyx_v_lr, __pyx_v_update); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 292, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_8 = PyNumber_InPlaceSubtract(__pyx_v_weights, __pyx_t_1); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 292, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF_SET(__pyx_v_weights, __pyx_t_8);
  __pyx_t_8 = 0;

  /* "thinc/neural/optimizers.pyx":242
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_7);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._radam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_beta1);
  __Pyx_XDECREF(__pyx_v_beta2);
  __Pyx_XDECREF(__pyx_v_eps);
  __Pyx_XDECREF(__pyx_v_sma_inf);
  __Pyx_XDECREF(__pyx_v_exp_avg);
  __Pyx_XDECREF(__pyx_v_exp_avg_sq);
  __Pyx_XDECREF(__pyx_v_bias_correction1);
  __Pyx_XDECREF(__pyx_v_bias_correction2);
  __Pyx_XDECREF(__pyx_v_sma_t);
  __Pyx_XDECREF(__pyx_v_update);
  __Pyx_XDECREF(__pyx_v_r_t);
  __Pyx_XDECREF(__pyx_v_w_norm);
  __Pyx_XDECREF(__pyx_v_u_norm);
  __Pyx_XDECREF(__pyx_v_phi_p);
  __Pyx_XDECREF(__pyx_v_local_lr);
  __Pyx_XDECREF(__pyx_v_lr);
  __Pyx_XDECREF(__pyx_v_weights);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":294
 *         weights -= lr * update
 * 
 *     def _nesterov(self, xp, weights, gradient, lr_scale, key):             # <<<<<<<<<<<<<<
 *         # http://cs231n.github.io/neural-networks-3/
 *         # v_prev = v # back this up
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_21_nesterov(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_21_nesterov = {"_nesterov", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_21_nesterov, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_21_nesterov(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  CYTHON_UNUSED PyObject *__pyx_v_xp = 0;
  PyObject *__pyx_v_weights = 0;
  PyObject *__pyx_v_gradient = 0;
  PyObject *__pyx_v_lr_scale = 0;
  PyObject *__pyx_v_key = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("_nesterov (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_xp,&__pyx_n_s_weights,&__pyx_n_s_gradient,&__pyx_n_s_lr_scale,&__pyx_n_s_key,0};
    PyObject* values[6] = {0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_xp)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, 1); __PYX_ERR(0, 294, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, 2); __PYX_ERR(0, 294, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_gradient)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, 3); __PYX_ERR(0, 294, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lr_scale)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, 4); __PYX_ERR(0, 294, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_key)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, 5); __PYX_ERR(0, 294, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_nesterov") < 0)) __PYX_ERR(0, 294, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 6) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
    }
    __pyx_v_self = values[0];
    __pyx_v_xp = values[1];
    __pyx_v_weights = values[2];
    __pyx_v_gradient = values[3];
    __pyx_v_lr_scale = values[4];
    __pyx_v_key = values[5];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("_nesterov", 1, 6, 6, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 294, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._nesterov", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_20_nesterov(__pyx_self, __pyx_v_self, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_20_nesterov(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key) {
  PyObject *__pyx_v_lr = NULL;
  PyObject *__pyx_v_momentum = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  int __pyx_t_3;
  int __pyx_t_4;
  PyObject *__pyx_t_5 = NULL;
  PyObject *__pyx_t_6 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__32)
  __Pyx_RefNannySetupContext("_nesterov", 0);
  __Pyx_TraceCall("_nesterov", __pyx_f[0], 294, 0, __PYX_ERR(0, 294, __pyx_L1_error));
  __Pyx_INCREF(__pyx_v_weights);

  /* "thinc/neural/optimizers.pyx":304
 *         # v -= lr * gradient
 *         # x += (1+mu) * v
 *         lr = self.alpha * lr_scale             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_alpha); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 304, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyNumber_Multiply(__pyx_t_1, __pyx_v_lr_scale); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 304, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_lr = __pyx_t_2;
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":305
 *         # x += (1+mu) * v
 *         lr = self.alpha * lr_scale
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         momentum = self.mom1[key]
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 305, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_2, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 305, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_4 = (__pyx_t_3 != 0);
  if (__pyx_t_4) {

    /* "thinc/neural/optimizers.pyx":306
 *         lr = self.alpha * lr_scale
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)             # <<<<<<<<<<<<<<
 *         momentum = self.mom1[key]
 *         weights += -self.b1 * momentum
 */
    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_allocate); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_5, function);
      }
    }
    __pyx_t_2 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_t_1) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_1);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    if (unlikely(PyObject_SetItem(__pyx_t_5, __pyx_v_key, __pyx_t_2) < 0)) __PYX_ERR(0, 306, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

    /* "thinc/neural/optimizers.pyx":305
 *         # x += (1+mu) * v
 *         lr = self.alpha * lr_scale
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         momentum = self.mom1[key]
 */
  }

  /* "thinc/neural/optimizers.pyx":307
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         momentum = self.mom1[key]             # <<<<<<<<<<<<<<
 *         weights += -self.b1 * momentum
 *         momentum *= self.b1
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 307, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_5 = __Pyx_PyObject_GetItem(__pyx_t_2, __pyx_v_key); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 307, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_v_momentum = __pyx_t_5;
  __pyx_t_5 = 0;

  /* "thinc/neural/optimizers.pyx":308
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         momentum = self.mom1[key]
 *         weights += -self.b1 * momentum             # <<<<<<<<<<<<<<
 *         momentum *= self.b1
 *         momentum -= lr * gradient
 */
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 308, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_2 = PyNumber_Negative(__pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 308, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_5 = PyNumber_Multiply(__pyx_t_2, __pyx_v_momentum); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 308, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = PyNumber_InPlaceAdd(__pyx_v_weights, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 308, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __Pyx_DECREF_SET(__pyx_v_weights, __pyx_t_2);
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":309
 *         momentum = self.mom1[key]
 *         weights += -self.b1 * momentum
 *         momentum *= self.b1             # <<<<<<<<<<<<<<
 *         momentum -= lr * gradient
 *         weights += (1+self.b1) * momentum
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 309, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_5 = PyNumber_InPlaceMultiply(__pyx_v_momentum, __pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 309, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF_SET(__pyx_v_momentum, __pyx_t_5);
  __pyx_t_5 = 0;

  /* "thinc/neural/optimizers.pyx":310
 *         weights += -self.b1 * momentum
 *         momentum *= self.b1
 *         momentum -= lr * gradient             # <<<<<<<<<<<<<<
 *         weights += (1+self.b1) * momentum
 * 
 */
  __pyx_t_5 = PyNumber_Multiply(__pyx_v_lr, __pyx_v_gradient); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 310, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_2 = PyNumber_InPlaceSubtract(__pyx_v_momentum, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 310, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __Pyx_DECREF_SET(__pyx_v_momentum, __pyx_t_2);
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":311
 *         momentum *= self.b1
 *         momentum -= lr * gradient
 *         weights += (1+self.b1) * momentum             # <<<<<<<<<<<<<<
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 */
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 311, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_5 = __Pyx_PyInt_AddCObj(__pyx_int_1, __pyx_t_2, 1, 0, 0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 311, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = PyNumber_Multiply(__pyx_t_5, __pyx_v_momentum); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 311, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_5 = PyNumber_InPlaceAdd(__pyx_v_weights, __pyx_t_2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 311, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF_SET(__pyx_v_weights, __pyx_t_5);
  __pyx_t_5 = 0;

  /* "thinc/neural/optimizers.pyx":294
 *         weights -= lr * update
 * 
 *     def _nesterov(self, xp, weights, gradient, lr_scale, key):             # <<<<<<<<<<<<<<
 *         # http://cs231n.github.io/neural-networks-3/
 *         # v_prev = v # back this up
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._nesterov", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_lr);
  __Pyx_XDECREF(__pyx_v_momentum);
  __Pyx_XDECREF(__pyx_v_weights);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":313
 *         weights += (1+self.b1) * momentum
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_23_adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_23_adam = {"_adam", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_23_adam, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Optimizer_23_adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_self = 0;
  CYTHON_UNUSED PyObject *__pyx_v_xp = 0;
  PyObject *__pyx_v_weights = 0;
  PyObject *__pyx_v_gradient = 0;
  PyObject *__pyx_v_lr_scale = 0;
  PyObject *__pyx_v_key = 0;
  PyObject *__pyx_v_nr_upd = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("_adam (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_self,&__pyx_n_s_xp,&__pyx_n_s_weights,&__pyx_n_s_gradient,&__pyx_n_s_lr_scale,&__pyx_n_s_key,&__pyx_n_s_nr_upd,0};
    PyObject* values[7] = {0,0,0,0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
        CYTHON_FALLTHROUGH;
        case  6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
        CYTHON_FALLTHROUGH;
        case  5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
        CYTHON_FALLTHROUGH;
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_self)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_xp)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 1); __PYX_ERR(0, 313, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_weights)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 2); __PYX_ERR(0, 313, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_gradient)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 3); __PYX_ERR(0, 313, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  4:
        if (likely((values[4] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_lr_scale)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 4); __PYX_ERR(0, 313, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  5:
        if (likely((values[5] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_key)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 5); __PYX_ERR(0, 313, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  6:
        if (likely((values[6] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nr_upd)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, 6); __PYX_ERR(0, 313, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_adam") < 0)) __PYX_ERR(0, 313, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 7) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
      values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
      values[5] = PyTuple_GET_ITEM(__pyx_args, 5);
      values[6] = PyTuple_GET_ITEM(__pyx_args, 6);
    }
    __pyx_v_self = values[0];
    __pyx_v_xp = values[1];
    __pyx_v_weights = values[2];
    __pyx_v_gradient = values[3];
    __pyx_v_lr_scale = values[4];
    __pyx_v_key = values[5];
    __pyx_v_nr_upd = values[6];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("_adam", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 313, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._adam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_9Optimizer_22_adam(__pyx_self, __pyx_v_self, __pyx_v_xp, __pyx_v_weights, __pyx_v_gradient, __pyx_v_lr_scale, __pyx_v_key, __pyx_v_nr_upd);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_9Optimizer_22_adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v_xp, PyObject *__pyx_v_weights, PyObject *__pyx_v_gradient, PyObject *__pyx_v_lr_scale, PyObject *__pyx_v_key, PyObject *__pyx_v_nr_upd) {
  PyObject *__pyx_v_mom1 = NULL;
  PyObject *__pyx_v_mom2 = NULL;
  __pyx_t_5thinc_8typedefs_weight_t __pyx_v_lr;
  __pyx_t_5thinc_8typedefs_weight_t __pyx_v_b1;
  __pyx_t_5thinc_8typedefs_weight_t __pyx_v_b2;
  __pyx_t_5thinc_8typedefs_weight_t __pyx_v_eps;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  int __pyx_t_2;
  int __pyx_t_3;
  PyObject *__pyx_t_4 = NULL;
  PyObject *__pyx_t_5 = NULL;
  PyObject *__pyx_t_6 = NULL;
  __pyx_t_5thinc_8typedefs_weight_t __pyx_t_7;
  PyObject *__pyx_t_8 = NULL;
  int __pyx_t_9;
  PyObject *__pyx_t_10 = NULL;
  PyObject *__pyx_t_11 = NULL;
  PyObject *__pyx_t_12 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__33)
  __Pyx_RefNannySetupContext("_adam", 0);
  __Pyx_TraceCall("_adam", __pyx_f[0], 313, 0, __PYX_ERR(0, 313, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":314
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 314, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_1, Py_NE)); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 314, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_3 = (__pyx_t_2 != 0);
  if (__pyx_t_3) {

    /* "thinc/neural/optimizers.pyx":315
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)             # <<<<<<<<<<<<<<
 *         if key not in self.mom2:
 *             self.mom2[key] = self.ops.allocate(weights.size)
 */
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_allocate); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_5, function);
      }
    }
    __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_4);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    if (unlikely(PyObject_SetItem(__pyx_t_5, __pyx_v_key, __pyx_t_1) < 0)) __PYX_ERR(0, 315, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    /* "thinc/neural/optimizers.pyx":314
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):
 *         if key not in self.mom1:             # <<<<<<<<<<<<<<
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 */
  }

  /* "thinc/neural/optimizers.pyx":316
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:             # <<<<<<<<<<<<<<
 *             self.mom2[key] = self.ops.allocate(weights.size)
 *         mom1 = self.mom1[key]
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 316, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_v_key, __pyx_t_1, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 316, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_2 = (__pyx_t_3 != 0);
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":317
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:
 *             self.mom2[key] = self.ops.allocate(weights.size)             # <<<<<<<<<<<<<<
 *         mom1 = self.mom1[key]
 *         mom2 = self.mom2[key]
 */
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_allocate); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_weights, __pyx_n_s_size); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_5);
    __pyx_t_6 = NULL;
    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) {
      __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_4);
      if (likely(__pyx_t_6)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
        __Pyx_INCREF(__pyx_t_6);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_4, function);
      }
    }
    __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_t_5) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_5);
    __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_4);
    if (unlikely(PyObject_SetItem(__pyx_t_4, __pyx_v_key, __pyx_t_1) < 0)) __PYX_ERR(0, 317, __pyx_L1_error)
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

    /* "thinc/neural/optimizers.pyx":316
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 *         if key not in self.mom2:             # <<<<<<<<<<<<<<
 *             self.mom2[key] = self.ops.allocate(weights.size)
 *         mom1 = self.mom1[key]
 */
  }

  /* "thinc/neural/optimizers.pyx":318
 *         if key not in self.mom2:
 *             self.mom2[key] = self.ops.allocate(weights.size)
 *         mom1 = self.mom1[key]             # <<<<<<<<<<<<<<
 *         mom2 = self.mom2[key]
 *         cdef weight_t lr = self.lr(nr_upd)
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 318, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_4 = __Pyx_PyObject_GetItem(__pyx_t_1, __pyx_v_key); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 318, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_mom1 = __pyx_t_4;
  __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":319
 *             self.mom2[key] = self.ops.allocate(weights.size)
 *         mom1 = self.mom1[key]
 *         mom2 = self.mom2[key]             # <<<<<<<<<<<<<<
 *         cdef weight_t lr = self.lr(nr_upd)
 *         cdef weight_t b1 = linear_decay(self.b1, self.b1_decay, nr_upd)
 */
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_mom2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 319, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_1 = __Pyx_PyObject_GetItem(__pyx_t_4, __pyx_v_key); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 319, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_v_mom2 = __pyx_t_1;
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":320
 *         mom1 = self.mom1[key]
 *         mom2 = self.mom2[key]
 *         cdef weight_t lr = self.lr(nr_upd)             # <<<<<<<<<<<<<<
 *         cdef weight_t b1 = linear_decay(self.b1, self.b1_decay, nr_upd)
 *         cdef weight_t b2 = linear_decay(self.b2, self.b2_decay, nr_upd)
 */
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_lr); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = NULL;
  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
    }
  }
  __pyx_t_1 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, __pyx_v_nr_upd) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_nr_upd);
  __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 320, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_7 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_7 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 320, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_lr = __pyx_t_7;

  /* "thinc/neural/optimizers.pyx":321
 *         mom2 = self.mom2[key]
 *         cdef weight_t lr = self.lr(nr_upd)
 *         cdef weight_t b1 = linear_decay(self.b1, self.b1_decay, nr_upd)             # <<<<<<<<<<<<<<
 *         cdef weight_t b2 = linear_decay(self.b2, self.b2_decay, nr_upd)
 *         cdef weight_t eps = self.eps
 */
  __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_linear_decay); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 321, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 321, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b1_decay); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 321, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_t_8 = NULL;
  __pyx_t_9 = 0;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_8 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_8)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_8);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
      __pyx_t_9 = 1;
    }
  }
  #if CYTHON_FAST_PYCALL
  if (PyFunction_Check(__pyx_t_4)) {
    PyObject *__pyx_temp[4] = {__pyx_t_8, __pyx_t_5, __pyx_t_6, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
  } else
  #endif
  #if CYTHON_FAST_PYCCALL
  if (__Pyx_PyFastCFunction_Check(__pyx_t_4)) {
    PyObject *__pyx_temp[4] = {__pyx_t_8, __pyx_t_5, __pyx_t_6, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
  } else
  #endif
  {
    __pyx_t_10 = PyTuple_New(3+__pyx_t_9); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 321, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_10);
    if (__pyx_t_8) {
      __Pyx_GIVEREF(__pyx_t_8); PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_8); __pyx_t_8 = NULL;
    }
    __Pyx_GIVEREF(__pyx_t_5);
    PyTuple_SET_ITEM(__pyx_t_10, 0+__pyx_t_9, __pyx_t_5);
    __Pyx_GIVEREF(__pyx_t_6);
    PyTuple_SET_ITEM(__pyx_t_10, 1+__pyx_t_9, __pyx_t_6);
    __Pyx_INCREF(__pyx_v_nr_upd);
    __Pyx_GIVEREF(__pyx_v_nr_upd);
    PyTuple_SET_ITEM(__pyx_t_10, 2+__pyx_t_9, __pyx_v_nr_upd);
    __pyx_t_5 = 0;
    __pyx_t_6 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_10, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 321, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
  }
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_7 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_7 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 321, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_b1 = __pyx_t_7;

  /* "thinc/neural/optimizers.pyx":322
 *         cdef weight_t lr = self.lr(nr_upd)
 *         cdef weight_t b1 = linear_decay(self.b1, self.b1_decay, nr_upd)
 *         cdef weight_t b2 = linear_decay(self.b2, self.b2_decay, nr_upd)             # <<<<<<<<<<<<<<
 *         cdef weight_t eps = self.eps
 *         self.ops.adam(
 */
  __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_linear_decay); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 322, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 322, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_10);
  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_b2_decay); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 322, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_t_5 = NULL;
  __pyx_t_9 = 0;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_4);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_4, function);
      __pyx_t_9 = 1;
    }
  }
  #if CYTHON_FAST_PYCALL
  if (PyFunction_Check(__pyx_t_4)) {
    PyObject *__pyx_temp[4] = {__pyx_t_5, __pyx_t_10, __pyx_t_6, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
  } else
  #endif
  #if CYTHON_FAST_PYCCALL
  if (__Pyx_PyFastCFunction_Check(__pyx_t_4)) {
    PyObject *__pyx_temp[4] = {__pyx_t_5, __pyx_t_10, __pyx_t_6, __pyx_v_nr_upd};
    __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_4, __pyx_temp+1-__pyx_t_9, 3+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
  } else
  #endif
  {
    __pyx_t_8 = PyTuple_New(3+__pyx_t_9); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 322, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_8);
    if (__pyx_t_5) {
      __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_5); __pyx_t_5 = NULL;
    }
    __Pyx_GIVEREF(__pyx_t_10);
    PyTuple_SET_ITEM(__pyx_t_8, 0+__pyx_t_9, __pyx_t_10);
    __Pyx_GIVEREF(__pyx_t_6);
    PyTuple_SET_ITEM(__pyx_t_8, 1+__pyx_t_9, __pyx_t_6);
    __Pyx_INCREF(__pyx_v_nr_upd);
    __Pyx_GIVEREF(__pyx_v_nr_upd);
    PyTuple_SET_ITEM(__pyx_t_8, 2+__pyx_t_9, __pyx_v_nr_upd);
    __pyx_t_10 = 0;
    __pyx_t_6 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_8, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 322, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  }
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  __pyx_t_7 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_7 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 322, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_b2 = __pyx_t_7;

  /* "thinc/neural/optimizers.pyx":323
 *         cdef weight_t b1 = linear_decay(self.b1, self.b1_decay, nr_upd)
 *         cdef weight_t b2 = linear_decay(self.b2, self.b2_decay, nr_upd)
 *         cdef weight_t eps = self.eps             # <<<<<<<<<<<<<<
 *         self.ops.adam(
 *             weights, gradient, mom1, mom2, b1, b2, eps, lr * lr_scale)
 */
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_eps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 323, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_7 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_7 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 323, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_eps = __pyx_t_7;

  /* "thinc/neural/optimizers.pyx":324
 *         cdef weight_t b2 = linear_decay(self.b2, self.b2_decay, nr_upd)
 *         cdef weight_t eps = self.eps
 *         self.ops.adam(             # <<<<<<<<<<<<<<
 *             weights, gradient, mom1, mom2, b1, b2, eps, lr * lr_scale)
 *         gradient.fill(0)
 */
  __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_ops); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 324, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_adam_2); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 324, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":325
 *         cdef weight_t eps = self.eps
 *         self.ops.adam(
 *             weights, gradient, mom1, mom2, b1, b2, eps, lr * lr_scale)             # <<<<<<<<<<<<<<
 *         gradient.fill(0)
 * 
 */
  __pyx_t_4 = PyFloat_FromDouble(__pyx_v_b1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 325, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_6 = PyFloat_FromDouble(__pyx_v_b2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 325, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_6);
  __pyx_t_10 = PyFloat_FromDouble(__pyx_v_eps); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 325, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_10);
  __pyx_t_5 = PyFloat_FromDouble(__pyx_v_lr); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 325, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __pyx_t_11 = PyNumber_Multiply(__pyx_t_5, __pyx_v_lr_scale); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 325, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_11);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  __pyx_t_5 = NULL;
  __pyx_t_9 = 0;
  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_8))) {
    __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_8);
    if (likely(__pyx_t_5)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
      __Pyx_INCREF(__pyx_t_5);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_8, function);
      __pyx_t_9 = 1;
    }
  }
  #if CYTHON_FAST_PYCALL
  if (PyFunction_Check(__pyx_t_8)) {
    PyObject *__pyx_temp[9] = {__pyx_t_5, __pyx_v_weights, __pyx_v_gradient, __pyx_v_mom1, __pyx_v_mom2, __pyx_t_4, __pyx_t_6, __pyx_t_10, __pyx_t_11};
    __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_8, __pyx_temp+1-__pyx_t_9, 8+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 324, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
  } else
  #endif
  #if CYTHON_FAST_PYCCALL
  if (__Pyx_PyFastCFunction_Check(__pyx_t_8)) {
    PyObject *__pyx_temp[9] = {__pyx_t_5, __pyx_v_weights, __pyx_v_gradient, __pyx_v_mom1, __pyx_v_mom2, __pyx_t_4, __pyx_t_6, __pyx_t_10, __pyx_t_11};
    __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_8, __pyx_temp+1-__pyx_t_9, 8+__pyx_t_9); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 324, __pyx_L1_error)
    __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
    __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0;
  } else
  #endif
  {
    __pyx_t_12 = PyTuple_New(8+__pyx_t_9); if (unlikely(!__pyx_t_12)) __PYX_ERR(0, 324, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_12);
    if (__pyx_t_5) {
      __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_5); __pyx_t_5 = NULL;
    }
    __Pyx_INCREF(__pyx_v_weights);
    __Pyx_GIVEREF(__pyx_v_weights);
    PyTuple_SET_ITEM(__pyx_t_12, 0+__pyx_t_9, __pyx_v_weights);
    __Pyx_INCREF(__pyx_v_gradient);
    __Pyx_GIVEREF(__pyx_v_gradient);
    PyTuple_SET_ITEM(__pyx_t_12, 1+__pyx_t_9, __pyx_v_gradient);
    __Pyx_INCREF(__pyx_v_mom1);
    __Pyx_GIVEREF(__pyx_v_mom1);
    PyTuple_SET_ITEM(__pyx_t_12, 2+__pyx_t_9, __pyx_v_mom1);
    __Pyx_INCREF(__pyx_v_mom2);
    __Pyx_GIVEREF(__pyx_v_mom2);
    PyTuple_SET_ITEM(__pyx_t_12, 3+__pyx_t_9, __pyx_v_mom2);
    __Pyx_GIVEREF(__pyx_t_4);
    PyTuple_SET_ITEM(__pyx_t_12, 4+__pyx_t_9, __pyx_t_4);
    __Pyx_GIVEREF(__pyx_t_6);
    PyTuple_SET_ITEM(__pyx_t_12, 5+__pyx_t_9, __pyx_t_6);
    __Pyx_GIVEREF(__pyx_t_10);
    PyTuple_SET_ITEM(__pyx_t_12, 6+__pyx_t_9, __pyx_t_10);
    __Pyx_GIVEREF(__pyx_t_11);
    PyTuple_SET_ITEM(__pyx_t_12, 7+__pyx_t_9, __pyx_t_11);
    __pyx_t_4 = 0;
    __pyx_t_6 = 0;
    __pyx_t_10 = 0;
    __pyx_t_11 = 0;
    __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_12, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 324, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0;
  }
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":326
 *         self.ops.adam(
 *             weights, gradient, mom1, mom2, b1, b2, eps, lr * lr_scale)
 *         gradient.fill(0)             # <<<<<<<<<<<<<<
 * 
 * 
 */
  __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_gradient, __pyx_n_s_fill); if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 326, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_8);
  __pyx_t_12 = NULL;
  if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_8))) {
    __pyx_t_12 = PyMethod_GET_SELF(__pyx_t_8);
    if (likely(__pyx_t_12)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8);
      __Pyx_INCREF(__pyx_t_12);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_8, function);
    }
  }
  __pyx_t_1 = (__pyx_t_12) ? __Pyx_PyObject_Call2Args(__pyx_t_8, __pyx_t_12, __pyx_int_0) : __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_int_0);
  __Pyx_XDECREF(__pyx_t_12); __pyx_t_12 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 326, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":313
 *         weights += (1+self.b1) * momentum
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */

  /* function exit code */
  __pyx_r = Py_None; __Pyx_INCREF(Py_None);
  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  __Pyx_XDECREF(__pyx_t_8);
  __Pyx_XDECREF(__pyx_t_10);
  __Pyx_XDECREF(__pyx_t_11);
  __Pyx_XDECREF(__pyx_t_12);
  __Pyx_AddTraceback("thinc.neural.optimizers.Optimizer._adam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_mom1);
  __Pyx_XDECREF(__pyx_v_mom2);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":329
 * 
 * 
 * def _make_ops(ops):             # <<<<<<<<<<<<<<
 *     if ops == "CupyOps":
 *         return CupyOps()
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_7_make_ops(PyObject *__pyx_self, PyObject *__pyx_v_ops); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_7_make_ops = {"_make_ops", (PyCFunction)__pyx_pw_5thinc_6neural_10optimizers_7_make_ops, METH_O, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_7_make_ops(PyObject *__pyx_self, PyObject *__pyx_v_ops) {
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("_make_ops (wrapper)", 0);
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_6_make_ops(__pyx_self, ((PyObject *)__pyx_v_ops));

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_6_make_ops(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_ops) {
  PyObject *__pyx_v_Model = NULL;
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  int __pyx_t_1;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  int __pyx_t_5;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__34)
  __Pyx_RefNannySetupContext("_make_ops", 0);
  __Pyx_TraceCall("_make_ops", __pyx_f[0], 329, 0, __PYX_ERR(0, 329, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":330
 * 
 * def _make_ops(ops):
 *     if ops == "CupyOps":             # <<<<<<<<<<<<<<
 *         return CupyOps()
 *     elif ops == "NumpyOps":
 */
  __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_v_ops, __pyx_n_s_CupyOps, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 330, __pyx_L1_error)
  if (__pyx_t_1) {

    /* "thinc/neural/optimizers.pyx":331
 * def _make_ops(ops):
 *     if ops == "CupyOps":
 *         return CupyOps()             # <<<<<<<<<<<<<<
 *     elif ops == "NumpyOps":
 *         return NumpyOps()
 */
    __Pyx_XDECREF(__pyx_r);
    __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_CupyOps); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 331, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_4 = NULL;
    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_3, function);
      }
    }
    __pyx_t_2 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_3);
    __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
    if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 331, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_r = __pyx_t_2;
    __pyx_t_2 = 0;
    goto __pyx_L0;

    /* "thinc/neural/optimizers.pyx":330
 * 
 * def _make_ops(ops):
 *     if ops == "CupyOps":             # <<<<<<<<<<<<<<
 *         return CupyOps()
 *     elif ops == "NumpyOps":
 */
  }

  /* "thinc/neural/optimizers.pyx":332
 *     if ops == "CupyOps":
 *         return CupyOps()
 *     elif ops == "NumpyOps":             # <<<<<<<<<<<<<<
 *         return NumpyOps()
 *     elif ops is None:
 */
  __pyx_t_1 = (__Pyx_PyString_Equals(__pyx_v_ops, __pyx_n_s_NumpyOps, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 332, __pyx_L1_error)
  if (__pyx_t_1) {

    /* "thinc/neural/optimizers.pyx":333
 *         return CupyOps()
 *     elif ops == "NumpyOps":
 *         return NumpyOps()             # <<<<<<<<<<<<<<
 *     elif ops is None:
 *         from ._classes.model import Model
 */
    __Pyx_XDECREF(__pyx_r);
    __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_NumpyOps); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 333, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_t_4 = NULL;
    if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_3))) {
      __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_3);
      if (likely(__pyx_t_4)) {
        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_3);
        __Pyx_INCREF(__pyx_t_4);
        __Pyx_INCREF(function);
        __Pyx_DECREF_SET(__pyx_t_3, function);
      }
    }
    __pyx_t_2 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_3);
    __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0;
    if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 333, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_r = __pyx_t_2;
    __pyx_t_2 = 0;
    goto __pyx_L0;

    /* "thinc/neural/optimizers.pyx":332
 *     if ops == "CupyOps":
 *         return CupyOps()
 *     elif ops == "NumpyOps":             # <<<<<<<<<<<<<<
 *         return NumpyOps()
 *     elif ops is None:
 */
  }

  /* "thinc/neural/optimizers.pyx":334
 *     elif ops == "NumpyOps":
 *         return NumpyOps()
 *     elif ops is None:             # <<<<<<<<<<<<<<
 *         from ._classes.model import Model
 *         return Model.ops
 */
  __pyx_t_1 = (__pyx_v_ops == Py_None);
  __pyx_t_5 = (__pyx_t_1 != 0);
  if (__pyx_t_5) {

    /* "thinc/neural/optimizers.pyx":335
 *         return NumpyOps()
 *     elif ops is None:
 *         from ._classes.model import Model             # <<<<<<<<<<<<<<
 *         return Model.ops
 *     else:
 */
    __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 335, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_INCREF(__pyx_n_s_Model);
    __Pyx_GIVEREF(__pyx_n_s_Model);
    PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_Model);
    __pyx_t_3 = __Pyx_Import(__pyx_n_s_classes_model, __pyx_t_2, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 335, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_3, __pyx_n_s_Model); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 335, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_2);
    __Pyx_INCREF(__pyx_t_2);
    __pyx_v_Model = __pyx_t_2;
    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

    /* "thinc/neural/optimizers.pyx":336
 *     elif ops is None:
 *         from ._classes.model import Model
 *         return Model.ops             # <<<<<<<<<<<<<<
 *     else:
 *         return ops
 */
    __Pyx_XDECREF(__pyx_r);
    __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_Model, __pyx_n_s_ops); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 336, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __pyx_r = __pyx_t_3;
    __pyx_t_3 = 0;
    goto __pyx_L0;

    /* "thinc/neural/optimizers.pyx":334
 *     elif ops == "NumpyOps":
 *         return NumpyOps()
 *     elif ops is None:             # <<<<<<<<<<<<<<
 *         from ._classes.model import Model
 *         return Model.ops
 */
  }

  /* "thinc/neural/optimizers.pyx":338
 *         return Model.ops
 *     else:
 *         return ops             # <<<<<<<<<<<<<<
 * 
 * 
 */
  /*else*/ {
    __Pyx_XDECREF(__pyx_r);
    __Pyx_INCREF(__pyx_v_ops);
    __pyx_r = __pyx_v_ops;
    goto __pyx_L0;
  }

  /* "thinc/neural/optimizers.pyx":329
 * 
 * 
 * def _make_ops(ops):             # <<<<<<<<<<<<<<
 *     if ops == "CupyOps":
 *         return CupyOps()
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_AddTraceback("thinc.neural.optimizers._make_ops", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XDECREF(__pyx_v_Model);
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":343
 * # These are deprecated
 * 
 * def Adam(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     return Optimizer(*args, **kwargs)
 * 
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_9Adam = {"Adam", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_9Adam, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_9Adam(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_args = 0;
  PyObject *__pyx_v_kwargs = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("Adam (wrapper)", 0);
  if (unlikely(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "Adam", 1))) return NULL;
  if (unlikely(__pyx_kwds)) {
    __pyx_v_kwargs = PyDict_Copy(__pyx_kwds); if (unlikely(!__pyx_v_kwargs)) return NULL;
    __Pyx_GOTREF(__pyx_v_kwargs);
  } else {
    __pyx_v_kwargs = NULL;
  }
  __Pyx_INCREF(__pyx_args);
  __pyx_v_args = __pyx_args;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_8Adam(__pyx_self, __pyx_v_args, __pyx_v_kwargs);

  /* function exit code */
  __Pyx_XDECREF(__pyx_v_args);
  __Pyx_XDECREF(__pyx_v_kwargs);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_8Adam(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_args, PyObject *__pyx_v_kwargs) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__35)
  __Pyx_RefNannySetupContext("Adam", 0);
  __Pyx_TraceCall("Adam", __pyx_f[0], 343, 0, __PYX_ERR(0, 343, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":344
 * 
 * def Adam(*args, **kwargs):
 *     return Optimizer(*args, **kwargs)             # <<<<<<<<<<<<<<
 * 
 * def SGD(*args, **kwargs):
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Optimizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 344, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_v_args, __pyx_v_kwargs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 344, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_r = __pyx_t_2;
  __pyx_t_2 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":343
 * # These are deprecated
 * 
 * def Adam(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     return Optimizer(*args, **kwargs)
 * 
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_AddTraceback("thinc.neural.optimizers.Adam", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":346
 *     return Optimizer(*args, **kwargs)
 * 
 * def SGD(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_11SGD(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_11SGD = {"SGD", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_11SGD, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_11SGD(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_args = 0;
  PyObject *__pyx_v_kwargs = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("SGD (wrapper)", 0);
  if (unlikely(__pyx_kwds) && unlikely(!__Pyx_CheckKeywordStrings(__pyx_kwds, "SGD", 1))) return NULL;
  __pyx_v_kwargs = (__pyx_kwds) ? PyDict_Copy(__pyx_kwds) : PyDict_New(); if (unlikely(!__pyx_v_kwargs)) return NULL;
  __Pyx_GOTREF(__pyx_v_kwargs);
  __Pyx_INCREF(__pyx_args);
  __pyx_v_args = __pyx_args;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_10SGD(__pyx_self, __pyx_v_args, __pyx_v_kwargs);

  /* function exit code */
  __Pyx_XDECREF(__pyx_v_args);
  __Pyx_XDECREF(__pyx_v_kwargs);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_10SGD(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_args, PyObject *__pyx_v_kwargs) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__36)
  __Pyx_RefNannySetupContext("SGD", 0);
  __Pyx_TraceCall("SGD", __pyx_f[0], 346, 0, __PYX_ERR(0, 346, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":347
 * 
 * def SGD(*args, **kwargs):
 *     kwargs.setdefault('beta1', 0.)             # <<<<<<<<<<<<<<
 *     kwargs.setdefault('beta2', 0.)
 *     return Optimizer(*args, **kwargs)
 */
  __pyx_t_1 = __Pyx_PyDict_SetDefault(__pyx_v_kwargs, __pyx_n_s_beta1, __pyx_float_0_, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 347, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":348
 * def SGD(*args, **kwargs):
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)             # <<<<<<<<<<<<<<
 *     return Optimizer(*args, **kwargs)
 * 
 */
  __pyx_t_1 = __Pyx_PyDict_SetDefault(__pyx_v_kwargs, __pyx_n_s_beta2, __pyx_float_0_, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 348, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":349
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)
 *     return Optimizer(*args, **kwargs)             # <<<<<<<<<<<<<<
 * 
 * 
 */
  __Pyx_XDECREF(__pyx_r);
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_Optimizer); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 349, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_v_args, __pyx_v_kwargs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 349, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_r = __pyx_t_2;
  __pyx_t_2 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":346
 *     return Optimizer(*args, **kwargs)
 * 
 * def SGD(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_AddTraceback("thinc.neural.optimizers.SGD", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":352
 * 
 * 
 * def linear_decay(rate, decay, nr_upd):             # <<<<<<<<<<<<<<
 *     return rate * 1./(1. + decay * nr_upd)
 * 
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_13linear_decay(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_13linear_decay = {"linear_decay", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_13linear_decay, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_13linear_decay(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_rate = 0;
  PyObject *__pyx_v_decay = 0;
  PyObject *__pyx_v_nr_upd = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("linear_decay (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_rate,&__pyx_n_s_decay,&__pyx_n_s_nr_upd,0};
    PyObject* values[3] = {0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_rate)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_decay)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("linear_decay", 1, 3, 3, 1); __PYX_ERR(0, 352, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nr_upd)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("linear_decay", 1, 3, 3, 2); __PYX_ERR(0, 352, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "linear_decay") < 0)) __PYX_ERR(0, 352, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 3) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
    }
    __pyx_v_rate = values[0];
    __pyx_v_decay = values[1];
    __pyx_v_nr_upd = values[2];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("linear_decay", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 352, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.linear_decay", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_12linear_decay(__pyx_self, __pyx_v_rate, __pyx_v_decay, __pyx_v_nr_upd);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_12linear_decay(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_rate, PyObject *__pyx_v_decay, PyObject *__pyx_v_nr_upd) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__37)
  __Pyx_RefNannySetupContext("linear_decay", 0);
  __Pyx_TraceCall("linear_decay", __pyx_f[0], 352, 0, __PYX_ERR(0, 352, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":353
 * 
 * def linear_decay(rate, decay, nr_upd):
 *     return rate * 1./(1. + decay * nr_upd)             # <<<<<<<<<<<<<<
 * 
 * 
 */
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = PyNumber_Multiply(__pyx_v_rate, __pyx_float_1_); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 353, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = PyNumber_Multiply(__pyx_v_decay, __pyx_v_nr_upd); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 353, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyFloat_AddCObj(__pyx_float_1_, __pyx_t_2, 1., 0, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 353, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyNumber_Divide(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 353, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_r = __pyx_t_2;
  __pyx_t_2 = 0;
  goto __pyx_L0;

  /* "thinc/neural/optimizers.pyx":352
 * 
 * 
 * def linear_decay(rate, decay, nr_upd):             # <<<<<<<<<<<<<<
 *     return rate * 1./(1. + decay * nr_upd)
 * 
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_AddTraceback("thinc.neural.optimizers.linear_decay", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* "thinc/neural/optimizers.pyx":356
 * 
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):             # <<<<<<<<<<<<<<
 *     if decay == 0.0:
 *         return rate
 */

/* Python wrapper */
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_15anneal(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static PyMethodDef __pyx_mdef_5thinc_6neural_10optimizers_15anneal = {"anneal", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_5thinc_6neural_10optimizers_15anneal, METH_VARARGS|METH_KEYWORDS, 0};
static PyObject *__pyx_pw_5thinc_6neural_10optimizers_15anneal(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
  PyObject *__pyx_v_rate = 0;
  PyObject *__pyx_v_decay = 0;
  PyObject *__pyx_v_decay_steps = 0;
  PyObject *__pyx_v_nr_upd = 0;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("anneal (wrapper)", 0);
  {
    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_rate,&__pyx_n_s_decay,&__pyx_n_s_decay_steps,&__pyx_n_s_nr_upd,0};
    PyObject* values[4] = {0,0,0,0};
    if (unlikely(__pyx_kwds)) {
      Py_ssize_t kw_args;
      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
      switch (pos_args) {
        case  4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
        CYTHON_FALLTHROUGH;
        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
        CYTHON_FALLTHROUGH;
        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
        CYTHON_FALLTHROUGH;
        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
        CYTHON_FALLTHROUGH;
        case  0: break;
        default: goto __pyx_L5_argtuple_error;
      }
      kw_args = PyDict_Size(__pyx_kwds);
      switch (pos_args) {
        case  0:
        if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_rate)) != 0)) kw_args--;
        else goto __pyx_L5_argtuple_error;
        CYTHON_FALLTHROUGH;
        case  1:
        if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_decay)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("anneal", 1, 4, 4, 1); __PYX_ERR(0, 356, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  2:
        if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_decay_steps)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("anneal", 1, 4, 4, 2); __PYX_ERR(0, 356, __pyx_L3_error)
        }
        CYTHON_FALLTHROUGH;
        case  3:
        if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_nr_upd)) != 0)) kw_args--;
        else {
          __Pyx_RaiseArgtupleInvalid("anneal", 1, 4, 4, 3); __PYX_ERR(0, 356, __pyx_L3_error)
        }
      }
      if (unlikely(kw_args > 0)) {
        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "anneal") < 0)) __PYX_ERR(0, 356, __pyx_L3_error)
      }
    } else if (PyTuple_GET_SIZE(__pyx_args) != 4) {
      goto __pyx_L5_argtuple_error;
    } else {
      values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
      values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
      values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
      values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
    }
    __pyx_v_rate = values[0];
    __pyx_v_decay = values[1];
    __pyx_v_decay_steps = values[2];
    __pyx_v_nr_upd = values[3];
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L5_argtuple_error:;
  __Pyx_RaiseArgtupleInvalid("anneal", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 356, __pyx_L3_error)
  __pyx_L3_error:;
  __Pyx_AddTraceback("thinc.neural.optimizers.anneal", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_5thinc_6neural_10optimizers_14anneal(__pyx_self, __pyx_v_rate, __pyx_v_decay, __pyx_v_decay_steps, __pyx_v_nr_upd);

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_5thinc_6neural_10optimizers_14anneal(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_rate, PyObject *__pyx_v_decay, PyObject *__pyx_v_decay_steps, PyObject *__pyx_v_nr_upd) {
  PyObject *__pyx_r = NULL;
  __Pyx_TraceDeclarations
  __Pyx_RefNannyDeclarations
  PyObject *__pyx_t_1 = NULL;
  int __pyx_t_2;
  PyObject *__pyx_t_3 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_TraceFrameInit(__pyx_codeobj__38)
  __Pyx_RefNannySetupContext("anneal", 0);
  __Pyx_TraceCall("anneal", __pyx_f[0], 356, 0, __PYX_ERR(0, 356, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":357
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):
 *     if decay == 0.0:             # <<<<<<<<<<<<<<
 *         return rate
 *     else:
 */
  __pyx_t_1 = __Pyx_PyFloat_EqObjC(__pyx_v_decay, __pyx_float_0_0, 0.0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 357, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_2 < 0)) __PYX_ERR(0, 357, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  if (__pyx_t_2) {

    /* "thinc/neural/optimizers.pyx":358
 * def anneal(rate, decay, decay_steps, nr_upd):
 *     if decay == 0.0:
 *         return rate             # <<<<<<<<<<<<<<
 *     else:
 *         return rate * decay ** (nr_upd / decay_steps)
 */
    __Pyx_XDECREF(__pyx_r);
    __Pyx_INCREF(__pyx_v_rate);
    __pyx_r = __pyx_v_rate;
    goto __pyx_L0;

    /* "thinc/neural/optimizers.pyx":357
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):
 *     if decay == 0.0:             # <<<<<<<<<<<<<<
 *         return rate
 *     else:
 */
  }

  /* "thinc/neural/optimizers.pyx":360
 *         return rate
 *     else:
 *         return rate * decay ** (nr_upd / decay_steps)             # <<<<<<<<<<<<<<
 */
  /*else*/ {
    __Pyx_XDECREF(__pyx_r);
    __pyx_t_1 = __Pyx_PyNumber_Divide(__pyx_v_nr_upd, __pyx_v_decay_steps); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 360, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __pyx_t_3 = PyNumber_Power(__pyx_v_decay, __pyx_t_1, Py_None); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 360, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_3);
    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
    __pyx_t_1 = PyNumber_Multiply(__pyx_v_rate, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 360, __pyx_L1_error)
    __Pyx_GOTREF(__pyx_t_1);
    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
    __pyx_r = __pyx_t_1;
    __pyx_t_1 = 0;
    goto __pyx_L0;
  }

  /* "thinc/neural/optimizers.pyx":356
 * 
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):             # <<<<<<<<<<<<<<
 *     if decay == 0.0:
 *         return rate
 */

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_AddTraceback("thinc.neural.optimizers.anneal", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_TraceReturn(__pyx_r, 0);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyMethodDef __pyx_methods[] = {
  {0, 0, 0, 0}
};

#if PY_MAJOR_VERSION >= 3
#if CYTHON_PEP489_MULTI_PHASE_INIT
static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/
static int __pyx_pymod_exec_optimizers(PyObject* module); /*proto*/
static PyModuleDef_Slot __pyx_moduledef_slots[] = {
  {Py_mod_create, (void*)__pyx_pymod_create},
  {Py_mod_exec, (void*)__pyx_pymod_exec_optimizers},
  {0, NULL}
};
#endif

static struct PyModuleDef __pyx_moduledef = {
    PyModuleDef_HEAD_INIT,
    "optimizers",
    0, /* m_doc */
  #if CYTHON_PEP489_MULTI_PHASE_INIT
    0, /* m_size */
  #else
    -1, /* m_size */
  #endif
    __pyx_methods /* m_methods */,
  #if CYTHON_PEP489_MULTI_PHASE_INIT
    __pyx_moduledef_slots, /* m_slots */
  #else
    NULL, /* m_reload */
  #endif
    NULL, /* m_traverse */
    NULL, /* m_clear */
    NULL /* m_free */
};
#endif
#ifndef CYTHON_SMALL_CODE
#if defined(__clang__)
    #define CYTHON_SMALL_CODE
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
    #define CYTHON_SMALL_CODE __attribute__((cold))
#else
    #define CYTHON_SMALL_CODE
#endif
#endif

static __Pyx_StringTabEntry __pyx_string_tab[] = {
  {&__pyx_n_s_ADAM_DEFAULTS, __pyx_k_ADAM_DEFAULTS, sizeof(__pyx_k_ADAM_DEFAULTS), 0, 0, 1, 1},
  {&__pyx_n_s_Adam, __pyx_k_Adam, sizeof(__pyx_k_Adam), 0, 0, 1, 1},
  {&__pyx_kp_s_Adam_v1, __pyx_k_Adam_v1, sizeof(__pyx_k_Adam_v1), 0, 0, 1, 0},
  {&__pyx_n_s_CupyOps, __pyx_k_CupyOps, sizeof(__pyx_k_CupyOps), 0, 0, 1, 1},
  {&__pyx_kp_s_Do_various_flavours_of_stochasti, __pyx_k_Do_various_flavours_of_stochasti, sizeof(__pyx_k_Do_various_flavours_of_stochasti), 0, 0, 1, 0},
  {&__pyx_n_s_L2, __pyx_k_L2, sizeof(__pyx_k_L2), 0, 0, 1, 1},
  {&__pyx_n_s_L2_is_weight_decay, __pyx_k_L2_is_weight_decay, sizeof(__pyx_k_L2_is_weight_decay), 0, 0, 1, 1},
  {&__pyx_n_s_Model, __pyx_k_Model, sizeof(__pyx_k_Model), 0, 0, 1, 1},
  {&__pyx_n_s_NotImplementedError, __pyx_k_NotImplementedError, sizeof(__pyx_k_NotImplementedError), 0, 0, 1, 1},
  {&__pyx_n_s_NumpyOps, __pyx_k_NumpyOps, sizeof(__pyx_k_NumpyOps), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer, __pyx_k_Optimizer, sizeof(__pyx_k_Optimizer), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer___call, __pyx_k_Optimizer___call, sizeof(__pyx_k_Optimizer___call), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer___init, __pyx_k_Optimizer___init, sizeof(__pyx_k_Optimizer___init), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer__adam, __pyx_k_Optimizer__adam, sizeof(__pyx_k_Optimizer__adam), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer__nesterov, __pyx_k_Optimizer__nesterov, sizeof(__pyx_k_Optimizer__nesterov), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer__radam, __pyx_k_Optimizer__radam, sizeof(__pyx_k_Optimizer__radam), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_from_config, __pyx_k_Optimizer_from_config, sizeof(__pyx_k_Optimizer_from_config), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_learn_rate, __pyx_k_Optimizer_learn_rate, sizeof(__pyx_k_Optimizer_learn_rate), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_lr, __pyx_k_Optimizer_lr, sizeof(__pyx_k_Optimizer_lr), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_step_schedules, __pyx_k_Optimizer_step_schedules, sizeof(__pyx_k_Optimizer_step_schedules), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_to_cpu, __pyx_k_Optimizer_to_cpu, sizeof(__pyx_k_Optimizer_to_cpu), 0, 0, 1, 1},
  {&__pyx_n_s_Optimizer_to_gpu, __pyx_k_Optimizer_to_gpu, sizeof(__pyx_k_Optimizer_to_gpu), 0, 0, 1, 1},
  {&__pyx_kp_s_RAdam_v1, __pyx_k_RAdam_v1, sizeof(__pyx_k_RAdam_v1), 0, 0, 1, 0},
  {&__pyx_n_s_SGD, __pyx_k_SGD, sizeof(__pyx_k_SGD), 0, 0, 1, 1},
  {&__pyx_n_s_SGD_DEFAULTS, __pyx_k_SGD_DEFAULTS, sizeof(__pyx_k_SGD_DEFAULTS), 0, 0, 1, 1},
  {&__pyx_kp_s_SGD_v1, __pyx_k_SGD_v1, sizeof(__pyx_k_SGD_v1), 0, 0, 1, 0},
  {&__pyx_n_s__47, __pyx_k__47, sizeof(__pyx_k__47), 0, 0, 1, 1},
  {&__pyx_n_s_adam, __pyx_k_adam, sizeof(__pyx_k_adam), 0, 0, 1, 1},
  {&__pyx_n_s_adam_2, __pyx_k_adam_2, sizeof(__pyx_k_adam_2), 0, 0, 1, 1},
  {&__pyx_n_s_add_gradient_noise, __pyx_k_add_gradient_noise, sizeof(__pyx_k_add_gradient_noise), 0, 0, 1, 1},
  {&__pyx_n_s_allocate, __pyx_k_allocate, sizeof(__pyx_k_allocate), 0, 0, 1, 1},
  {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1},
  {&__pyx_n_s_anneal, __pyx_k_anneal, sizeof(__pyx_k_anneal), 0, 0, 1, 1},
  {&__pyx_n_s_args, __pyx_k_args, sizeof(__pyx_k_args), 0, 0, 1, 1},
  {&__pyx_n_s_asarray, __pyx_k_asarray, sizeof(__pyx_k_asarray), 0, 0, 1, 1},
  {&__pyx_n_s_averages, __pyx_k_averages, sizeof(__pyx_k_averages), 0, 0, 1, 1},
  {&__pyx_n_s_b1, __pyx_k_b1, sizeof(__pyx_k_b1), 0, 0, 1, 1},
  {&__pyx_n_s_b1_decay, __pyx_k_b1_decay, sizeof(__pyx_k_b1_decay), 0, 0, 1, 1},
  {&__pyx_n_s_b2, __pyx_k_b2, sizeof(__pyx_k_b2), 0, 0, 1, 1},
  {&__pyx_n_s_b2_decay, __pyx_k_b2_decay, sizeof(__pyx_k_b2_decay), 0, 0, 1, 1},
  {&__pyx_n_s_beta1, __pyx_k_beta1, sizeof(__pyx_k_beta1), 0, 0, 1, 1},
  {&__pyx_n_s_beta2, __pyx_k_beta2, sizeof(__pyx_k_beta2), 0, 0, 1, 1},
  {&__pyx_n_s_bias_correction1, __pyx_k_bias_correction1, sizeof(__pyx_k_bias_correction1), 0, 0, 1, 1},
  {&__pyx_n_s_bias_correction2, __pyx_k_bias_correction2, sizeof(__pyx_k_bias_correction2), 0, 0, 1, 1},
  {&__pyx_n_s_call, __pyx_k_call, sizeof(__pyx_k_call), 0, 0, 1, 1},
  {&__pyx_n_s_classes_model, __pyx_k_classes_model, sizeof(__pyx_k_classes_model), 0, 0, 1, 1},
  {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1},
  {&__pyx_n_s_clip_gradient, __pyx_k_clip_gradient, sizeof(__pyx_k_clip_gradient), 0, 0, 1, 1},
  {&__pyx_n_s_cls, __pyx_k_cls, sizeof(__pyx_k_cls), 0, 0, 1, 1},
  {&__pyx_n_s_collections, __pyx_k_collections, sizeof(__pyx_k_collections), 0, 0, 1, 1},
  {&__pyx_n_s_config, __pyx_k_config, sizeof(__pyx_k_config), 0, 0, 1, 1},
  {&__pyx_n_s_create_Adam, __pyx_k_create_Adam, sizeof(__pyx_k_create_Adam), 0, 0, 1, 1},
  {&__pyx_n_s_create_RAdam, __pyx_k_create_RAdam, sizeof(__pyx_k_create_RAdam), 0, 0, 1, 1},
  {&__pyx_n_s_create_SGD, __pyx_k_create_SGD, sizeof(__pyx_k_create_SGD), 0, 0, 1, 1},
  {&__pyx_n_s_decay, __pyx_k_decay, sizeof(__pyx_k_decay), 0, 0, 1, 1},
  {&__pyx_n_s_decay_steps, __pyx_k_decay_steps, sizeof(__pyx_k_decay_steps), 0, 0, 1, 1},
  {&__pyx_n_s_defaultdict, __pyx_k_defaultdict, sizeof(__pyx_k_defaultdict), 0, 0, 1, 1},
  {&__pyx_n_s_doc, __pyx_k_doc, sizeof(__pyx_k_doc), 0, 0, 1, 1},
  {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1},
  {&__pyx_n_s_eps, __pyx_k_eps, sizeof(__pyx_k_eps), 0, 0, 1, 1},
  {&__pyx_n_s_exp_avg, __pyx_k_exp_avg, sizeof(__pyx_k_exp_avg), 0, 0, 1, 1},
  {&__pyx_n_s_exp_avg_sq, __pyx_k_exp_avg_sq, sizeof(__pyx_k_exp_avg_sq), 0, 0, 1, 1},
  {&__pyx_n_s_f, __pyx_k_f, sizeof(__pyx_k_f), 0, 0, 1, 1},
  {&__pyx_n_s_fill, __pyx_k_fill, sizeof(__pyx_k_fill), 0, 0, 1, 1},
  {&__pyx_n_s_fix1, __pyx_k_fix1, sizeof(__pyx_k_fix1), 0, 0, 1, 1},
  {&__pyx_n_s_fix2, __pyx_k_fix2, sizeof(__pyx_k_fix2), 0, 0, 1, 1},
  {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1},
  {&__pyx_n_s_from_config, __pyx_k_from_config, sizeof(__pyx_k_from_config), 0, 0, 1, 1},
  {&__pyx_n_s_get, __pyx_k_get, sizeof(__pyx_k_get), 0, 0, 1, 1},
  {&__pyx_n_s_get_array_module, __pyx_k_get_array_module, sizeof(__pyx_k_get_array_module), 0, 0, 1, 1},
  {&__pyx_n_s_gradient, __pyx_k_gradient, sizeof(__pyx_k_gradient), 0, 0, 1, 1},
  {&__pyx_n_s_gradient_noise, __pyx_k_gradient_noise, sizeof(__pyx_k_gradient_noise), 0, 0, 1, 1},
  {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
  {&__pyx_n_s_init, __pyx_k_init, sizeof(__pyx_k_init), 0, 0, 1, 1},
  {&__pyx_n_s_items, __pyx_k_items, sizeof(__pyx_k_items), 0, 0, 1, 1},
  {&__pyx_n_s_key, __pyx_k_key, sizeof(__pyx_k_key), 0, 0, 1, 1},
  {&__pyx_n_s_kwargs, __pyx_k_kwargs, sizeof(__pyx_k_kwargs), 0, 0, 1, 1},
  {&__pyx_n_s_lars_max, __pyx_k_lars_max, sizeof(__pyx_k_lars_max), 0, 0, 1, 1},
  {&__pyx_n_s_lars_min, __pyx_k_lars_min, sizeof(__pyx_k_lars_min), 0, 0, 1, 1},
  {&__pyx_n_s_last_seen, __pyx_k_last_seen, sizeof(__pyx_k_last_seen), 0, 0, 1, 1},
  {&__pyx_n_s_learn_rate, __pyx_k_learn_rate, sizeof(__pyx_k_learn_rate), 0, 0, 1, 1},
  {&__pyx_n_s_linalg, __pyx_k_linalg, sizeof(__pyx_k_linalg), 0, 0, 1, 1},
  {&__pyx_n_s_linear_decay, __pyx_k_linear_decay, sizeof(__pyx_k_linear_decay), 0, 0, 1, 1},
  {&__pyx_n_s_local_lr, __pyx_k_local_lr, sizeof(__pyx_k_local_lr), 0, 0, 1, 1},
  {&__pyx_n_s_lookahead_alpha, __pyx_k_lookahead_alpha, sizeof(__pyx_k_lookahead_alpha), 0, 0, 1, 1},
  {&__pyx_n_s_lookahead_k, __pyx_k_lookahead_k, sizeof(__pyx_k_lookahead_k), 0, 0, 1, 1},
  {&__pyx_n_s_lr, __pyx_k_lr, sizeof(__pyx_k_lr), 0, 0, 1, 1},
  {&__pyx_n_s_lr_scale, __pyx_k_lr_scale, sizeof(__pyx_k_lr_scale), 0, 0, 1, 1},
  {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
  {&__pyx_n_s_make_from_config, __pyx_k_make_from_config, sizeof(__pyx_k_make_from_config), 0, 0, 1, 1},
  {&__pyx_n_s_make_ops, __pyx_k_make_ops, sizeof(__pyx_k_make_ops), 0, 0, 1, 1},
  {&__pyx_n_s_math, __pyx_k_math, sizeof(__pyx_k_math), 0, 0, 1, 1},
  {&__pyx_n_s_max_grad_norm, __pyx_k_max_grad_norm, sizeof(__pyx_k_max_grad_norm), 0, 0, 1, 1},
  {&__pyx_n_s_metaclass, __pyx_k_metaclass, sizeof(__pyx_k_metaclass), 0, 0, 1, 1},
  {&__pyx_n_s_module, __pyx_k_module, sizeof(__pyx_k_module), 0, 0, 1, 1},
  {&__pyx_n_s_mom1, __pyx_k_mom1, sizeof(__pyx_k_mom1), 0, 0, 1, 1},
  {&__pyx_n_s_mom2, __pyx_k_mom2, sizeof(__pyx_k_mom2), 0, 0, 1, 1},
  {&__pyx_n_s_momentum, __pyx_k_momentum, sizeof(__pyx_k_momentum), 0, 0, 1, 1},
  {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
  {&__pyx_n_s_nesterov, __pyx_k_nesterov, sizeof(__pyx_k_nesterov), 0, 0, 1, 1},
  {&__pyx_n_s_nesterov_2, __pyx_k_nesterov_2, sizeof(__pyx_k_nesterov_2), 0, 0, 1, 1},
  {&__pyx_n_s_norm, __pyx_k_norm, sizeof(__pyx_k_norm), 0, 0, 1, 1},
  {&__pyx_n_s_nr_upd, __pyx_k_nr_upd, sizeof(__pyx_k_nr_upd), 0, 0, 1, 1},
  {&__pyx_n_s_nr_update, __pyx_k_nr_update, sizeof(__pyx_k_nr_update), 0, 0, 1, 1},
  {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1},
  {&__pyx_n_s_object, __pyx_k_object, sizeof(__pyx_k_object), 0, 0, 1, 1},
  {&__pyx_n_s_ops, __pyx_k_ops, sizeof(__pyx_k_ops), 0, 0, 1, 1},
  {&__pyx_n_s_optimizers, __pyx_k_optimizers, sizeof(__pyx_k_optimizers), 0, 0, 1, 1},
  {&__pyx_kp_s_optimizers_pyx, __pyx_k_optimizers_pyx, sizeof(__pyx_k_optimizers_pyx), 0, 0, 1, 0},
  {&__pyx_n_s_params, __pyx_k_params, sizeof(__pyx_k_params), 0, 0, 1, 1},
  {&__pyx_n_s_phi_p, __pyx_k_phi_p, sizeof(__pyx_k_phi_p), 0, 0, 1, 1},
  {&__pyx_n_s_prepare, __pyx_k_prepare, sizeof(__pyx_k_prepare), 0, 0, 1, 1},
  {&__pyx_n_s_property, __pyx_k_property, sizeof(__pyx_k_property), 0, 0, 1, 1},
  {&__pyx_n_s_qualname, __pyx_k_qualname, sizeof(__pyx_k_qualname), 0, 0, 1, 1},
  {&__pyx_n_s_r_t, __pyx_k_r_t, sizeof(__pyx_k_r_t), 0, 0, 1, 1},
  {&__pyx_n_s_radam, __pyx_k_radam, sizeof(__pyx_k_radam), 0, 0, 1, 1},
  {&__pyx_n_s_rate, __pyx_k_rate, sizeof(__pyx_k_rate), 0, 0, 1, 1},
  {&__pyx_n_s_register, __pyx_k_register, sizeof(__pyx_k_register), 0, 0, 1, 1},
  {&__pyx_n_s_registry, __pyx_k_registry, sizeof(__pyx_k_registry), 0, 0, 1, 1},
  {&__pyx_n_s_registry_2, __pyx_k_registry_2, sizeof(__pyx_k_registry_2), 0, 0, 1, 1},
  {&__pyx_n_s_schedule, __pyx_k_schedule, sizeof(__pyx_k_schedule), 0, 0, 1, 1},
  {&__pyx_n_s_schedules, __pyx_k_schedules, sizeof(__pyx_k_schedules), 0, 0, 1, 1},
  {&__pyx_n_s_self, __pyx_k_self, sizeof(__pyx_k_self), 0, 0, 1, 1},
  {&__pyx_n_s_setdefault, __pyx_k_setdefault, sizeof(__pyx_k_setdefault), 0, 0, 1, 1},
  {&__pyx_n_s_setter, __pyx_k_setter, sizeof(__pyx_k_setter), 0, 0, 1, 1},
  {&__pyx_n_s_shape, __pyx_k_shape, sizeof(__pyx_k_shape), 0, 0, 1, 1},
  {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1},
  {&__pyx_n_s_slow, __pyx_k_slow, sizeof(__pyx_k_slow), 0, 0, 1, 1},
  {&__pyx_n_s_slow_weights, __pyx_k_slow_weights, sizeof(__pyx_k_slow_weights), 0, 0, 1, 1},
  {&__pyx_n_s_sma_inf, __pyx_k_sma_inf, sizeof(__pyx_k_sma_inf), 0, 0, 1, 1},
  {&__pyx_n_s_sma_t, __pyx_k_sma_t, sizeof(__pyx_k_sma_t), 0, 0, 1, 1},
  {&__pyx_n_s_sqrt, __pyx_k_sqrt, sizeof(__pyx_k_sqrt), 0, 0, 1, 1},
  {&__pyx_n_s_step_schedules, __pyx_k_step_schedules, sizeof(__pyx_k_step_schedules), 0, 0, 1, 1},
  {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
  {&__pyx_n_s_thinc_neural_optimizers, __pyx_k_thinc_neural_optimizers, sizeof(__pyx_k_thinc_neural_optimizers), 0, 0, 1, 1},
  {&__pyx_n_s_to_cpu, __pyx_k_to_cpu, sizeof(__pyx_k_to_cpu), 0, 0, 1, 1},
  {&__pyx_n_s_to_gpu, __pyx_k_to_gpu, sizeof(__pyx_k_to_gpu), 0, 0, 1, 1},
  {&__pyx_n_s_u_norm, __pyx_k_u_norm, sizeof(__pyx_k_u_norm), 0, 0, 1, 1},
  {&__pyx_n_s_update, __pyx_k_update, sizeof(__pyx_k_update), 0, 0, 1, 1},
  {&__pyx_n_s_update_averages, __pyx_k_update_averages, sizeof(__pyx_k_update_averages), 0, 0, 1, 1},
  {&__pyx_n_s_use_averages, __pyx_k_use_averages, sizeof(__pyx_k_use_averages), 0, 0, 1, 1},
  {&__pyx_n_s_use_lars, __pyx_k_use_lars, sizeof(__pyx_k_use_lars), 0, 0, 1, 1},
  {&__pyx_n_s_use_radam, __pyx_k_use_radam, sizeof(__pyx_k_use_radam), 0, 0, 1, 1},
  {&__pyx_n_s_util, __pyx_k_util, sizeof(__pyx_k_util), 0, 0, 1, 1},
  {&__pyx_n_s_value, __pyx_k_value, sizeof(__pyx_k_value), 0, 0, 1, 1},
  {&__pyx_n_s_w_norm, __pyx_k_w_norm, sizeof(__pyx_k_w_norm), 0, 0, 1, 1},
  {&__pyx_n_s_weights, __pyx_k_weights, sizeof(__pyx_k_weights), 0, 0, 1, 1},
  {&__pyx_n_s_xp, __pyx_k_xp, sizeof(__pyx_k_xp), 0, 0, 1, 1},
  {0, 0, 0, 0, 0, 0, 0}
};
static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
  __pyx_builtin_object = __Pyx_GetBuiltinName(__pyx_n_s_object); if (!__pyx_builtin_object) __PYX_ERR(0, 112, __pyx_L1_error)
  __pyx_builtin_property = __Pyx_GetBuiltinName(__pyx_n_s_property); if (!__pyx_builtin_property) __PYX_ERR(0, 183, __pyx_L1_error)
  __pyx_builtin_NotImplementedError = __Pyx_GetBuiltinName(__pyx_n_s_NotImplementedError); if (!__pyx_builtin_NotImplementedError) __PYX_ERR(0, 221, __pyx_L1_error)
  return 0;
  __pyx_L1_error:;
  return -1;
}

static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);

  /* "thinc/neural/optimizers.pyx":236
 *             slow = self.slow_weights[key]
 *             slow += self.lookahead_alpha * (weights - slow)
 *             weights[:] = slow             # <<<<<<<<<<<<<<
 *         if self.averages is not None:
 *             if key not in self.averages:
 */
  __pyx_slice__30 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__30)) __PYX_ERR(0, 236, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_slice__30);
  __Pyx_GIVEREF(__pyx_slice__30);

  /* "thinc/neural/optimizers.pyx":38
 * 
 * 
 * @registry.optimizers.register("RAdam.v1")             # <<<<<<<<<<<<<<
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __pyx_tuple__39 = PyTuple_Pack(1, __pyx_kp_s_RAdam_v1); if (unlikely(!__pyx_tuple__39)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__39);
  __Pyx_GIVEREF(__pyx_tuple__39);

  /* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __pyx_tuple__40 = PyTuple_Pack(10, __pyx_n_s_learn_rate, __pyx_n_s_L2, __pyx_n_s_beta1, __pyx_n_s_beta2, __pyx_n_s_eps, __pyx_n_s_max_grad_norm, __pyx_n_s_L2_is_weight_decay, __pyx_n_s_use_averages, __pyx_n_s_schedules, __pyx_n_s_ops); if (unlikely(!__pyx_tuple__40)) __PYX_ERR(0, 39, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__40);
  __Pyx_GIVEREF(__pyx_tuple__40);
  __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(10, 0, 10, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__40, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_create_RAdam, 39, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 39, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":67
 * 
 * 
 * @registry.optimizers.register("Adam.v1")             # <<<<<<<<<<<<<<
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __pyx_tuple__41 = PyTuple_Pack(1, __pyx_kp_s_Adam_v1); if (unlikely(!__pyx_tuple__41)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__41);
  __Pyx_GIVEREF(__pyx_tuple__41);

  /* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __pyx_tuple__42 = PyTuple_Pack(10, __pyx_n_s_learn_rate, __pyx_n_s_L2, __pyx_n_s_beta1, __pyx_n_s_beta2, __pyx_n_s_eps, __pyx_n_s_max_grad_norm, __pyx_n_s_L2_is_weight_decay, __pyx_n_s_use_averages, __pyx_n_s_ops, __pyx_n_s_schedules); if (unlikely(!__pyx_tuple__42)) __PYX_ERR(0, 68, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__42);
  __Pyx_GIVEREF(__pyx_tuple__42);
  __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(10, 0, 10, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__42, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_create_Adam, 68, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) __PYX_ERR(0, 68, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":97
 * 
 * 
 * @registry.optimizers.register("SGD.v1")             # <<<<<<<<<<<<<<
 * def create_SGD(learn_rate,
 *         ops=None,
 */
  __pyx_tuple__43 = PyTuple_Pack(1, __pyx_kp_s_SGD_v1); if (unlikely(!__pyx_tuple__43)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__43);
  __Pyx_GIVEREF(__pyx_tuple__43);

  /* "thinc/neural/optimizers.pyx":98
 * 
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 */
  __pyx_tuple__44 = PyTuple_Pack(7, __pyx_n_s_learn_rate, __pyx_n_s_ops, __pyx_n_s_L2, __pyx_n_s_max_grad_norm, __pyx_n_s_L2_is_weight_decay, __pyx_n_s_use_averages, __pyx_n_s_schedules); if (unlikely(!__pyx_tuple__44)) __PYX_ERR(0, 98, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__44);
  __Pyx_GIVEREF(__pyx_tuple__44);
  __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(7, 0, 7, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__44, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_create_SGD, 98, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) __PYX_ERR(0, 98, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":112
 * 
 * 
 * class Optimizer(object):             # <<<<<<<<<<<<<<
 *     '''Do various flavours of stochastic gradient descent, with first and
 *     second order momentum.
 */
  __pyx_tuple__45 = PyTuple_Pack(1, __pyx_builtin_object); if (unlikely(!__pyx_tuple__45)) __PYX_ERR(0, 112, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__45);
  __Pyx_GIVEREF(__pyx_tuple__45);

  /* "thinc/neural/optimizers.pyx":124
 *     '''
 *     @classmethod
 *     def from_config(cls, config):             # <<<<<<<<<<<<<<
 *         return registry.make_from_config(config)
 * 
 */
  __pyx_tuple__46 = PyTuple_Pack(2, __pyx_n_s_cls, __pyx_n_s_config); if (unlikely(!__pyx_tuple__46)) __PYX_ERR(0, 124, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__46);
  __Pyx_GIVEREF(__pyx_tuple__46);
  __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__46, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_from_config, 124, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) __PYX_ERR(0, 124, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":127
 *         return registry.make_from_config(config)
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,             # <<<<<<<<<<<<<<
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 */
  __pyx_tuple__48 = PyTuple_Pack(18, __pyx_n_s_self, __pyx_n_s_ops, __pyx_n_s_lr, __pyx_n_s_L2, __pyx_n_s_beta1, __pyx_n_s_beta2, __pyx_n_s_eps, __pyx_n_s_max_grad_norm, __pyx_n_s_gradient_noise, __pyx_n_s_nesterov, __pyx_n_s_L2_is_weight_decay, __pyx_n_s_lookahead_k, __pyx_n_s_lookahead_alpha, __pyx_n_s_use_averages, __pyx_n_s_use_radam, __pyx_n_s_use_lars, __pyx_n_s_schedule, __pyx_n_s__47); if (unlikely(!__pyx_tuple__48)) __PYX_ERR(0, 127, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__48);
  __Pyx_GIVEREF(__pyx_tuple__48);
  __pyx_codeobj__22 = (PyObject*)__Pyx_PyCode_New(17, 0, 18, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_VARKEYWORDS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__48, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_init, 127, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__22)) __PYX_ERR(0, 127, __pyx_L1_error)
  __pyx_tuple__49 = PyTuple_Pack(14, ((PyObject*)__pyx_float_1eneg_4), ((PyObject*)__pyx_float_0_90), ((PyObject*)__pyx_float_0_999), ((PyObject*)__pyx_float_1eneg_08), ((PyObject*)__pyx_float_10_), ((PyObject*)__pyx_float_0_0), ((PyObject *)Py_True), ((PyObject *)Py_False), ((PyObject *)__pyx_int_0), ((PyObject*)__pyx_float_0_5), ((PyObject *)Py_True), ((PyObject *)Py_False), ((PyObject *)Py_False), ((PyObject *)Py_None)); if (unlikely(!__pyx_tuple__49)) __PYX_ERR(0, 127, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__49);
  __Pyx_GIVEREF(__pyx_tuple__49);

  /* "thinc/neural/optimizers.pyx":166
 *         self.b2_decay = 0.0
 * 
 *     def to_gpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */
  __pyx_tuple__50 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_params, __pyx_n_s_key, __pyx_n_s_value); if (unlikely(!__pyx_tuple__50)) __PYX_ERR(0, 166, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__50);
  __Pyx_GIVEREF(__pyx_tuple__50);
  __pyx_codeobj__23 = (PyObject*)__Pyx_PyCode_New(1, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__50, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_to_gpu, 166, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__23)) __PYX_ERR(0, 166, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":172
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 *     def to_cpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */
  __pyx_tuple__51 = PyTuple_Pack(4, __pyx_n_s_self, __pyx_n_s_params, __pyx_n_s_key, __pyx_n_s_value); if (unlikely(!__pyx_tuple__51)) __PYX_ERR(0, 172, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__51);
  __Pyx_GIVEREF(__pyx_tuple__51);
  __pyx_codeobj__24 = (PyObject*)__Pyx_PyCode_New(1, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__51, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_to_cpu, 172, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__24)) __PYX_ERR(0, 172, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":179
 *                     params[key] = value.get()
 * 
 *     def step_schedules(self):             # <<<<<<<<<<<<<<
 *         for key, schedule in self.schedules.items():
 *             setattr(self, key, next(schedule))
 */
  __pyx_tuple__52 = PyTuple_Pack(3, __pyx_n_s_self, __pyx_n_s_key, __pyx_n_s_schedule); if (unlikely(!__pyx_tuple__52)) __PYX_ERR(0, 179, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__52);
  __Pyx_GIVEREF(__pyx_tuple__52);
  __pyx_codeobj__25 = (PyObject*)__Pyx_PyCode_New(1, 0, 3, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__52, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_step_schedules, 179, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__25)) __PYX_ERR(0, 179, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":184
 * 
 *     @property
 *     def learn_rate(self):             # <<<<<<<<<<<<<<
 *         return self.alpha
 * 
 */
  __pyx_tuple__53 = PyTuple_Pack(1, __pyx_n_s_self); if (unlikely(!__pyx_tuple__53)) __PYX_ERR(0, 184, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__53);
  __Pyx_GIVEREF(__pyx_tuple__53);
  __pyx_codeobj__26 = (PyObject*)__Pyx_PyCode_New(1, 0, 1, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__53, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_learn_rate, 184, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__26)) __PYX_ERR(0, 184, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":188
 * 
 *     @learn_rate.setter
 *     def learn_rate(self, learn_rate):             # <<<<<<<<<<<<<<
 *         self.alpha = learn_rate
 * 
 */
  __pyx_tuple__54 = PyTuple_Pack(2, __pyx_n_s_self, __pyx_n_s_learn_rate); if (unlikely(!__pyx_tuple__54)) __PYX_ERR(0, 188, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__54);
  __Pyx_GIVEREF(__pyx_tuple__54);
  __pyx_codeobj__27 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__54, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_learn_rate, 188, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__27)) __PYX_ERR(0, 188, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":191
 *         self.alpha = learn_rate
 * 
 *     def lr(self, nr_upd):             # <<<<<<<<<<<<<<
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:
 */
  __pyx_tuple__55 = PyTuple_Pack(5, __pyx_n_s_self, __pyx_n_s_nr_upd, __pyx_n_s_alpha, __pyx_n_s_fix1, __pyx_n_s_fix2); if (unlikely(!__pyx_tuple__55)) __PYX_ERR(0, 191, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__55);
  __Pyx_GIVEREF(__pyx_tuple__55);
  __pyx_codeobj__28 = (PyObject*)__Pyx_PyCode_New(2, 0, 5, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__55, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_lr, 191, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__28)) __PYX_ERR(0, 191, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":199
 *         return alpha * numpy.sqrt(fix2) / fix1
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):             # <<<<<<<<<<<<<<
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 */
  __pyx_tuple__56 = PyTuple_Pack(8, __pyx_n_s_self, __pyx_n_s_weights, __pyx_n_s_gradient, __pyx_n_s_lr_scale, __pyx_n_s_key, __pyx_n_s_xp, __pyx_n_s_nr_upd, __pyx_n_s_slow); if (unlikely(!__pyx_tuple__56)) __PYX_ERR(0, 199, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__56);
  __Pyx_GIVEREF(__pyx_tuple__56);
  __pyx_codeobj__29 = (PyObject*)__Pyx_PyCode_New(5, 0, 8, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__56, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_call, 199, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__29)) __PYX_ERR(0, 199, __pyx_L1_error)
  __pyx_tuple__57 = PyTuple_Pack(2, ((PyObject*)__pyx_float_1_), ((PyObject *)Py_None)); if (unlikely(!__pyx_tuple__57)) __PYX_ERR(0, 199, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__57);
  __Pyx_GIVEREF(__pyx_tuple__57);

  /* "thinc/neural/optimizers.pyx":242
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */
  __pyx_tuple__58 = PyTuple_Pack(23, __pyx_n_s_self, __pyx_n_s_xp, __pyx_n_s_weights, __pyx_n_s_gradient, __pyx_n_s_lr_scale, __pyx_n_s_key, __pyx_n_s_nr_upd, __pyx_n_s_beta1, __pyx_n_s_beta2, __pyx_n_s_eps, __pyx_n_s_sma_inf, __pyx_n_s_exp_avg, __pyx_n_s_exp_avg_sq, __pyx_n_s_bias_correction1, __pyx_n_s_bias_correction2, __pyx_n_s_sma_t, __pyx_n_s_update, __pyx_n_s_r_t, __pyx_n_s_w_norm, __pyx_n_s_u_norm, __pyx_n_s_phi_p, __pyx_n_s_local_lr, __pyx_n_s_lr); if (unlikely(!__pyx_tuple__58)) __PYX_ERR(0, 242, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__58);
  __Pyx_GIVEREF(__pyx_tuple__58);
  __pyx_codeobj__31 = (PyObject*)__Pyx_PyCode_New(7, 0, 23, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__58, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_radam, 242, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__31)) __PYX_ERR(0, 242, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":294
 *         weights -= lr * update
 * 
 *     def _nesterov(self, xp, weights, gradient, lr_scale, key):             # <<<<<<<<<<<<<<
 *         # http://cs231n.github.io/neural-networks-3/
 *         # v_prev = v # back this up
 */
  __pyx_tuple__59 = PyTuple_Pack(8, __pyx_n_s_self, __pyx_n_s_xp, __pyx_n_s_weights, __pyx_n_s_gradient, __pyx_n_s_lr_scale, __pyx_n_s_key, __pyx_n_s_lr, __pyx_n_s_momentum); if (unlikely(!__pyx_tuple__59)) __PYX_ERR(0, 294, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__59);
  __Pyx_GIVEREF(__pyx_tuple__59);
  __pyx_codeobj__32 = (PyObject*)__Pyx_PyCode_New(6, 0, 8, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__59, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_nesterov_2, 294, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__32)) __PYX_ERR(0, 294, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":313
 *         weights += (1+self.b1) * momentum
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */
  __pyx_tuple__60 = PyTuple_Pack(13, __pyx_n_s_self, __pyx_n_s_xp, __pyx_n_s_weights, __pyx_n_s_gradient, __pyx_n_s_lr_scale, __pyx_n_s_key, __pyx_n_s_nr_upd, __pyx_n_s_mom1, __pyx_n_s_mom2, __pyx_n_s_lr, __pyx_n_s_b1, __pyx_n_s_b2, __pyx_n_s_eps); if (unlikely(!__pyx_tuple__60)) __PYX_ERR(0, 313, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__60);
  __Pyx_GIVEREF(__pyx_tuple__60);
  __pyx_codeobj__33 = (PyObject*)__Pyx_PyCode_New(7, 0, 13, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__60, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_adam, 313, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__33)) __PYX_ERR(0, 313, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":329
 * 
 * 
 * def _make_ops(ops):             # <<<<<<<<<<<<<<
 *     if ops == "CupyOps":
 *         return CupyOps()
 */
  __pyx_tuple__61 = PyTuple_Pack(2, __pyx_n_s_ops, __pyx_n_s_Model); if (unlikely(!__pyx_tuple__61)) __PYX_ERR(0, 329, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__61);
  __Pyx_GIVEREF(__pyx_tuple__61);
  __pyx_codeobj__34 = (PyObject*)__Pyx_PyCode_New(1, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__61, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_make_ops, 329, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__34)) __PYX_ERR(0, 329, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":343
 * # These are deprecated
 * 
 * def Adam(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     return Optimizer(*args, **kwargs)
 * 
 */
  __pyx_tuple__62 = PyTuple_Pack(2, __pyx_n_s_args, __pyx_n_s_kwargs); if (unlikely(!__pyx_tuple__62)) __PYX_ERR(0, 343, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__62);
  __Pyx_GIVEREF(__pyx_tuple__62);
  __pyx_codeobj__35 = (PyObject*)__Pyx_PyCode_New(0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_VARARGS|CO_VARKEYWORDS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__62, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_Adam, 343, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__35)) __PYX_ERR(0, 343, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":346
 *     return Optimizer(*args, **kwargs)
 * 
 * def SGD(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)
 */
  __pyx_tuple__63 = PyTuple_Pack(2, __pyx_n_s_args, __pyx_n_s_kwargs); if (unlikely(!__pyx_tuple__63)) __PYX_ERR(0, 346, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__63);
  __Pyx_GIVEREF(__pyx_tuple__63);
  __pyx_codeobj__36 = (PyObject*)__Pyx_PyCode_New(0, 0, 2, 0, CO_OPTIMIZED|CO_NEWLOCALS|CO_VARARGS|CO_VARKEYWORDS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__63, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_SGD, 346, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__36)) __PYX_ERR(0, 346, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":352
 * 
 * 
 * def linear_decay(rate, decay, nr_upd):             # <<<<<<<<<<<<<<
 *     return rate * 1./(1. + decay * nr_upd)
 * 
 */
  __pyx_tuple__64 = PyTuple_Pack(3, __pyx_n_s_rate, __pyx_n_s_decay, __pyx_n_s_nr_upd); if (unlikely(!__pyx_tuple__64)) __PYX_ERR(0, 352, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__64);
  __Pyx_GIVEREF(__pyx_tuple__64);
  __pyx_codeobj__37 = (PyObject*)__Pyx_PyCode_New(3, 0, 3, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__64, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_linear_decay, 352, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__37)) __PYX_ERR(0, 352, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":356
 * 
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):             # <<<<<<<<<<<<<<
 *     if decay == 0.0:
 *         return rate
 */
  __pyx_tuple__65 = PyTuple_Pack(4, __pyx_n_s_rate, __pyx_n_s_decay, __pyx_n_s_decay_steps, __pyx_n_s_nr_upd); if (unlikely(!__pyx_tuple__65)) __PYX_ERR(0, 356, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_tuple__65);
  __Pyx_GIVEREF(__pyx_tuple__65);
  __pyx_codeobj__38 = (PyObject*)__Pyx_PyCode_New(4, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__65, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_optimizers_pyx, __pyx_n_s_anneal, 356, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__38)) __PYX_ERR(0, 356, __pyx_L1_error)
  __Pyx_RefNannyFinishContext();
  return 0;
  __pyx_L1_error:;
  __Pyx_RefNannyFinishContext();
  return -1;
}

static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
  __pyx_umethod_PyDict_Type_setdefault.type = (PyObject*)&PyDict_Type;
  if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
  __pyx_float_0_ = PyFloat_FromDouble(0.); if (unlikely(!__pyx_float_0_)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_1_ = PyFloat_FromDouble(1.); if (unlikely(!__pyx_float_1_)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_0 = PyFloat_FromDouble(0.0); if (unlikely(!__pyx_float_0_0)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_5 = PyFloat_FromDouble(0.5); if (unlikely(!__pyx_float_0_5)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_9 = PyFloat_FromDouble(0.9); if (unlikely(!__pyx_float_0_9)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_10_ = PyFloat_FromDouble(10.); if (unlikely(!__pyx_float_10_)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_90 = PyFloat_FromDouble(0.90); if (unlikely(!__pyx_float_0_90)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_1eneg_4 = PyFloat_FromDouble(1e-4); if (unlikely(!__pyx_float_1eneg_4)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_001 = PyFloat_FromDouble(0.001); if (unlikely(!__pyx_float_0_001)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_0_999 = PyFloat_FromDouble(0.999); if (unlikely(!__pyx_float_0_999)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_float_1eneg_08 = PyFloat_FromDouble(1e-08); if (unlikely(!__pyx_float_1eneg_08)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_2 = PyInt_FromLong(2); if (unlikely(!__pyx_int_2)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_int_10 = PyInt_FromLong(10); if (unlikely(!__pyx_int_10)) __PYX_ERR(0, 1, __pyx_L1_error)
  return 0;
  __pyx_L1_error:;
  return -1;
}

static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/
static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/

static int __Pyx_modinit_global_init_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0);
  /*--- Global init code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_variable_export_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0);
  /*--- Variable export code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_function_export_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0);
  /*--- Function export code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_type_init_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
  /*--- Type init code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_type_import_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0);
  /*--- Type import code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_variable_import_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0);
  /*--- Variable import code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}

static int __Pyx_modinit_function_import_code(void) {
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0);
  /*--- Function import code ---*/
  __Pyx_RefNannyFinishContext();
  return 0;
}


#ifndef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
#elif PY_MAJOR_VERSION < 3
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" void
#else
#define __Pyx_PyMODINIT_FUNC void
#endif
#else
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
#else
#define __Pyx_PyMODINIT_FUNC PyObject *
#endif
#endif


#if PY_MAJOR_VERSION < 3
__Pyx_PyMODINIT_FUNC initoptimizers(void) CYTHON_SMALL_CODE; /*proto*/
__Pyx_PyMODINIT_FUNC initoptimizers(void)
#else
__Pyx_PyMODINIT_FUNC PyInit_optimizers(void) CYTHON_SMALL_CODE; /*proto*/
__Pyx_PyMODINIT_FUNC PyInit_optimizers(void)
#if CYTHON_PEP489_MULTI_PHASE_INIT
{
  return PyModuleDef_Init(&__pyx_moduledef);
}
static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
    #if PY_VERSION_HEX >= 0x030700A1
    static PY_INT64_T main_interpreter_id = -1;
    PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp);
    if (main_interpreter_id == -1) {
        main_interpreter_id = current_id;
        return (unlikely(current_id == -1)) ? -1 : 0;
    } else if (unlikely(main_interpreter_id != current_id))
    #else
    static PyInterpreterState *main_interpreter = NULL;
    PyInterpreterState *current_interpreter = PyThreadState_Get()->interp;
    if (!main_interpreter) {
        main_interpreter = current_interpreter;
    } else if (unlikely(main_interpreter != current_interpreter))
    #endif
    {
        PyErr_SetString(
            PyExc_ImportError,
            "Interpreter change detected - this module can only be loaded into one interpreter per process.");
        return -1;
    }
    return 0;
}
static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none) {
    PyObject *value = PyObject_GetAttrString(spec, from_name);
    int result = 0;
    if (likely(value)) {
        if (allow_none || value != Py_None) {
            result = PyDict_SetItemString(moddict, to_name, value);
        }
        Py_DECREF(value);
    } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
        PyErr_Clear();
    } else {
        result = -1;
    }
    return result;
}
static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, CYTHON_UNUSED PyModuleDef *def) {
    PyObject *module = NULL, *moddict, *modname;
    if (__Pyx_check_single_interpreter())
        return NULL;
    if (__pyx_m)
        return __Pyx_NewRef(__pyx_m);
    modname = PyObject_GetAttrString(spec, "name");
    if (unlikely(!modname)) goto bad;
    module = PyModule_NewObject(modname);
    Py_DECREF(modname);
    if (unlikely(!module)) goto bad;
    moddict = PyModule_GetDict(module);
    if (unlikely(!moddict)) goto bad;
    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad;
    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad;
    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad;
    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad;
    return module;
bad:
    Py_XDECREF(module);
    return NULL;
}


static CYTHON_SMALL_CODE int __pyx_pymod_exec_optimizers(PyObject *__pyx_pyinit_module)
#endif
#endif
{
  __Pyx_TraceDeclarations
  PyObject *__pyx_t_1 = NULL;
  PyObject *__pyx_t_2 = NULL;
  PyObject *__pyx_t_3 = NULL;
  PyObject *__pyx_t_4 = NULL;
  PyObject *__pyx_t_5 = NULL;
  PyObject *__pyx_t_6 = NULL;
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;
  __Pyx_RefNannyDeclarations
  #if CYTHON_PEP489_MULTI_PHASE_INIT
  if (__pyx_m) {
    if (__pyx_m == __pyx_pyinit_module) return 0;
    PyErr_SetString(PyExc_RuntimeError, "Module 'optimizers' has already been imported. Re-initialisation is not supported.");
    return -1;
  }
  #elif PY_MAJOR_VERSION >= 3
  if (__pyx_m) return __Pyx_NewRef(__pyx_m);
  #endif
  #if CYTHON_REFNANNY
__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
if (!__Pyx_RefNanny) {
  PyErr_Clear();
  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
  if (!__Pyx_RefNanny)
      Py_FatalError("failed to import 'refnanny' module");
}
#endif
  __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_optimizers(void)", 0);
  if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #ifdef __Pxy_PyFrame_Initialize_Offsets
  __Pxy_PyFrame_Initialize_Offsets();
  #endif
  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
  __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
  #ifdef __Pyx_CyFunction_USED
  if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  #ifdef __Pyx_FusedFunction_USED
  if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  #ifdef __Pyx_Coroutine_USED
  if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  #ifdef __Pyx_Generator_USED
  if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  #ifdef __Pyx_AsyncGen_USED
  if (__pyx_AsyncGen_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  #ifdef __Pyx_StopAsyncIteration_USED
  if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  /*--- Library function declarations ---*/
  /*--- Threads initialization code ---*/
  #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
  #ifdef WITH_THREAD /* Python build with threading support? */
  PyEval_InitThreads();
  #endif
  #endif
  /*--- Module creation code ---*/
  #if CYTHON_PEP489_MULTI_PHASE_INIT
  __pyx_m = __pyx_pyinit_module;
  Py_INCREF(__pyx_m);
  #else
  #if PY_MAJOR_VERSION < 3
  __pyx_m = Py_InitModule4("optimizers", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
  #else
  __pyx_m = PyModule_Create(&__pyx_moduledef);
  #endif
  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
  Py_INCREF(__pyx_d);
  __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
  Py_INCREF(__pyx_b);
  __pyx_cython_runtime = PyImport_AddModule((char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
  Py_INCREF(__pyx_cython_runtime);
  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
  /*--- Initialize various global constants etc. ---*/
  if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
  if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  if (__pyx_module_is_main_thinc__neural__optimizers) {
    if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  }
  #if PY_MAJOR_VERSION >= 3
  {
    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
    if (!PyDict_GetItemString(modules, "thinc.neural.optimizers")) {
      if (unlikely(PyDict_SetItemString(modules, "thinc.neural.optimizers", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
    }
  }
  #endif
  /*--- Builtin init code ---*/
  if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  /*--- Constants init code ---*/
  if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  /*--- Global type/function init code ---*/
  (void)__Pyx_modinit_global_init_code();
  (void)__Pyx_modinit_variable_export_code();
  (void)__Pyx_modinit_function_export_code();
  (void)__Pyx_modinit_type_init_code();
  (void)__Pyx_modinit_type_import_code();
  (void)__Pyx_modinit_variable_import_code();
  (void)__Pyx_modinit_function_import_code();
  /*--- Execution code ---*/
  #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
  if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  #endif
  __Pyx_TraceCall("__Pyx_PyMODINIT_FUNC PyInit_optimizers(void)", __pyx_f[0], 1, 0, __PYX_ERR(0, 1, __pyx_L1_error));

  /* "thinc/neural/optimizers.pyx":8
 * from libc.math cimport exp, sqrt
 * from libc.stdlib cimport calloc, malloc, free
 * import math             # <<<<<<<<<<<<<<
 * 
 * from collections import defaultdict
 */
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_math, 0, -1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_math, __pyx_t_1) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":10
 * import math
 * 
 * from collections import defaultdict             # <<<<<<<<<<<<<<
 * import numpy
 * 
 */
  __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_INCREF(__pyx_n_s_defaultdict);
  __Pyx_GIVEREF(__pyx_n_s_defaultdict);
  PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_defaultdict);
  __pyx_t_2 = __Pyx_Import(__pyx_n_s_collections, __pyx_t_1, -1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_defaultdict); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_defaultdict, __pyx_t_1) < 0) __PYX_ERR(0, 10, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":11
 * 
 * from collections import defaultdict
 * import numpy             # <<<<<<<<<<<<<<
 * 
 * from ..typedefs cimport weight_t
 */
  __pyx_t_2 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 11, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_numpy, __pyx_t_2) < 0) __PYX_ERR(0, 11, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":14
 * 
 * from ..typedefs cimport weight_t
 * from .ops import NumpyOps, CupyOps, add_gradient_noise             # <<<<<<<<<<<<<<
 * from .util import get_array_module
 * from .._registry import registry
 */
  __pyx_t_2 = PyList_New(3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_n_s_NumpyOps);
  __Pyx_GIVEREF(__pyx_n_s_NumpyOps);
  PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_NumpyOps);
  __Pyx_INCREF(__pyx_n_s_CupyOps);
  __Pyx_GIVEREF(__pyx_n_s_CupyOps);
  PyList_SET_ITEM(__pyx_t_2, 1, __pyx_n_s_CupyOps);
  __Pyx_INCREF(__pyx_n_s_add_gradient_noise);
  __Pyx_GIVEREF(__pyx_n_s_add_gradient_noise);
  PyList_SET_ITEM(__pyx_t_2, 2, __pyx_n_s_add_gradient_noise);
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_ops, __pyx_t_2, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_1, __pyx_n_s_NumpyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_NumpyOps, __pyx_t_2) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_1, __pyx_n_s_CupyOps); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_CupyOps, __pyx_t_2) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_1, __pyx_n_s_add_gradient_noise); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_add_gradient_noise, __pyx_t_2) < 0) __PYX_ERR(0, 14, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":15
 * from ..typedefs cimport weight_t
 * from .ops import NumpyOps, CupyOps, add_gradient_noise
 * from .util import get_array_module             # <<<<<<<<<<<<<<
 * from .._registry import registry
 * 
 */
  __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_INCREF(__pyx_n_s_get_array_module);
  __Pyx_GIVEREF(__pyx_n_s_get_array_module);
  PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_get_array_module);
  __pyx_t_2 = __Pyx_Import(__pyx_n_s_util, __pyx_t_1, 1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 15, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_get_array_module); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 15, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_get_array_module, __pyx_t_1) < 0) __PYX_ERR(0, 15, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":16
 * from .ops import NumpyOps, CupyOps, add_gradient_noise
 * from .util import get_array_module
 * from .._registry import registry             # <<<<<<<<<<<<<<
 * 
 * 
 */
  __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_INCREF(__pyx_n_s_registry);
  __Pyx_GIVEREF(__pyx_n_s_registry);
  PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_registry);
  __pyx_t_1 = __Pyx_Import(__pyx_n_s_registry_2, __pyx_t_2, 2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_1, __pyx_n_s_registry); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 16, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_registry, __pyx_t_2) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":20
 * 
 * SGD_DEFAULTS = {
 *     "L2": 1e-4,             # <<<<<<<<<<<<<<
 *     "max_grad_norm": 10,
 *     "L2_is_weight_decay": False
 */
  __pyx_t_1 = __Pyx_PyDict_NewPresized(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 20, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_L2, __pyx_float_1eneg_4) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_max_grad_norm, __pyx_int_10) < 0) __PYX_ERR(0, 20, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":22
 *     "L2": 1e-4,
 *     "max_grad_norm": 10,
 *     "L2_is_weight_decay": False             # <<<<<<<<<<<<<<
 * }
 * 
 */
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_L2_is_weight_decay, Py_False) < 0) __PYX_ERR(0, 20, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_SGD_DEFAULTS, __pyx_t_1) < 0) __PYX_ERR(0, 19, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":27
 * 
 * ADAM_DEFAULTS = {
 *     "learn_rate": 0.001,             # <<<<<<<<<<<<<<
 *     "beta1": 0.9,
 *     "beta2": 0.999,
 */
  __pyx_t_1 = __Pyx_PyDict_NewPresized(8); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_learn_rate, __pyx_float_0_001) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_beta1, __pyx_float_0_9) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_beta2, __pyx_float_0_999) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_eps, __pyx_float_1eneg_08) < 0) __PYX_ERR(0, 27, __pyx_L1_error)

  /* "thinc/neural/optimizers.pyx":31
 *     "beta2": 0.999,
 *     "eps": 1e-08,
 *     "L2": SGD_DEFAULTS["L2"],             # <<<<<<<<<<<<<<
 *     "max_grad_norm": SGD_DEFAULTS["max_grad_norm"],
 *     "L2_is_weight_decay": SGD_DEFAULTS["L2_is_weight_decay"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_L2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_L2, __pyx_t_3) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":32
 *     "eps": 1e-08,
 *     "L2": SGD_DEFAULTS["L2"],
 *     "max_grad_norm": SGD_DEFAULTS["max_grad_norm"],             # <<<<<<<<<<<<<<
 *     "L2_is_weight_decay": SGD_DEFAULTS["L2_is_weight_decay"],
 *     "schedules": None
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 32, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_max_grad_norm, __pyx_t_2) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":33
 *     "L2": SGD_DEFAULTS["L2"],
 *     "max_grad_norm": SGD_DEFAULTS["max_grad_norm"],
 *     "L2_is_weight_decay": SGD_DEFAULTS["L2_is_weight_decay"],             # <<<<<<<<<<<<<<
 *     "schedules": None
 * }
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 33, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 33, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_L2_is_weight_decay, __pyx_t_3) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":34
 *     "max_grad_norm": SGD_DEFAULTS["max_grad_norm"],
 *     "L2_is_weight_decay": SGD_DEFAULTS["L2_is_weight_decay"],
 *     "schedules": None             # <<<<<<<<<<<<<<
 * }
 * 
 */
  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_schedules, Py_None) < 0) __PYX_ERR(0, 27, __pyx_L1_error)
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_ADAM_DEFAULTS, __pyx_t_1) < 0) __PYX_ERR(0, 26, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 39, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_learn_rate); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 39, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k_ = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":40
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],             # <<<<<<<<<<<<<<
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 40, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_L2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 40, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__2 = __pyx_t_1;
  __Pyx_GIVEREF(__pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":41
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],             # <<<<<<<<<<<<<<
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 41, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_beta1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 41, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k__3 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":42
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],             # <<<<<<<<<<<<<<
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 42, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_beta2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 42, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__4 = __pyx_t_1;
  __Pyx_GIVEREF(__pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":43
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],             # <<<<<<<<<<<<<<
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 43, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_eps); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 43, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k__5 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":44
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],             # <<<<<<<<<<<<<<
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 44, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 44, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__6 = __pyx_t_1;
  __Pyx_GIVEREF(__pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":45
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],             # <<<<<<<<<<<<<<
 *         use_averages=True,
 *         schedules=None,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 45, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 45, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k__7 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":38
 * 
 * 
 * @registry.optimizers.register("RAdam.v1")             # <<<<<<<<<<<<<<
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_registry); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_optimizers); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_register); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__39, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":39
 * 
 * @registry.optimizers.register("RAdam.v1")
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_1create_RAdam, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 39, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);

  /* "thinc/neural/optimizers.pyx":38
 * 
 * 
 * @registry.optimizers.register("RAdam.v1")             # <<<<<<<<<<<<<<
 * def create_RAdam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 38, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_create_RAdam, __pyx_t_2) < 0) __PYX_ERR(0, 39, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 68, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_learn_rate); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 68, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_k__9 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":69
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],             # <<<<<<<<<<<<<<
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 69, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_L2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 69, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__10 = __pyx_t_2;
  __Pyx_GIVEREF(__pyx_t_2);
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":70
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],             # <<<<<<<<<<<<<<
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 70, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_beta1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 70, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_k__11 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":71
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],             # <<<<<<<<<<<<<<
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 71, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_beta2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 71, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__12 = __pyx_t_2;
  __Pyx_GIVEREF(__pyx_t_2);
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":72
 *         beta1=ADAM_DEFAULTS["beta1"],
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],             # <<<<<<<<<<<<<<
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 72, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_eps); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 72, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_k__13 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":73
 *         beta2=ADAM_DEFAULTS["beta2"],
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],             # <<<<<<<<<<<<<<
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 73, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 73, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__14 = __pyx_t_2;
  __Pyx_GIVEREF(__pyx_t_2);
  __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":74
 *         eps=ADAM_DEFAULTS["eps"],
 *         max_grad_norm=ADAM_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=ADAM_DEFAULTS["L2_is_weight_decay"],             # <<<<<<<<<<<<<<
 *         use_averages=True,
 *         ops=None,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_ADAM_DEFAULTS); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 74, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_2, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 74, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_k__15 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":67
 * 
 * 
 * @registry.optimizers.register("Adam.v1")             # <<<<<<<<<<<<<<
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_registry); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_register); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__41, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":68
 * 
 * @registry.optimizers.register("Adam.v1")
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],             # <<<<<<<<<<<<<<
 *         L2=ADAM_DEFAULTS["L2"],
 *         beta1=ADAM_DEFAULTS["beta1"],
 */
  __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_3create_Adam, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 68, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);

  /* "thinc/neural/optimizers.pyx":67
 * 
 * 
 * @registry.optimizers.register("Adam.v1")             # <<<<<<<<<<<<<<
 * def create_Adam(learn_rate=ADAM_DEFAULTS["learn_rate"],
 *         L2=ADAM_DEFAULTS["L2"],
 */
  __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 67, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_create_Adam, __pyx_t_1) < 0) __PYX_ERR(0, 68, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":100
 * def create_SGD(learn_rate,
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],             # <<<<<<<<<<<<<<
 *         max_grad_norm=SGD_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=SGD_DEFAULTS["L2_is_weight_decay"],
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 100, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_L2); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 100, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k__17 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":101
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 *         max_grad_norm=SGD_DEFAULTS["max_grad_norm"],             # <<<<<<<<<<<<<<
 *         L2_is_weight_decay=SGD_DEFAULTS["L2_is_weight_decay"],
 *         use_averages=True,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 101, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_Dict_GetItem(__pyx_t_3, __pyx_n_s_max_grad_norm); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 101, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_k__18 = __pyx_t_1;
  __Pyx_GIVEREF(__pyx_t_1);
  __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":102
 *         L2=SGD_DEFAULTS["L2"],
 *         max_grad_norm=SGD_DEFAULTS["max_grad_norm"],
 *         L2_is_weight_decay=SGD_DEFAULTS["L2_is_weight_decay"],             # <<<<<<<<<<<<<<
 *         use_averages=True,
 *         schedules=None
 */
  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_SGD_DEFAULTS); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_t_3 = __Pyx_PyObject_Dict_GetItem(__pyx_t_1, __pyx_n_s_L2_is_weight_decay); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 102, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_k__19 = __pyx_t_3;
  __Pyx_GIVEREF(__pyx_t_3);
  __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":97
 * 
 * 
 * @registry.optimizers.register("SGD.v1")             # <<<<<<<<<<<<<<
 * def create_SGD(learn_rate,
 *         ops=None,
 */
  __Pyx_GetModuleGlobalName(__pyx_t_3, __pyx_n_s_registry); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_optimizers); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_register); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_tuple__43, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;

  /* "thinc/neural/optimizers.pyx":98
 * 
 * @registry.optimizers.register("SGD.v1")
 * def create_SGD(learn_rate,             # <<<<<<<<<<<<<<
 *         ops=None,
 *         L2=SGD_DEFAULTS["L2"],
 */
  __pyx_t_3 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_5create_SGD, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 98, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);

  /* "thinc/neural/optimizers.pyx":97
 * 
 * 
 * @registry.optimizers.register("SGD.v1")             # <<<<<<<<<<<<<<
 * def create_SGD(learn_rate,
 *         ops=None,
 */
  __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 97, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_create_SGD, __pyx_t_2) < 0) __PYX_ERR(0, 98, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":112
 * 
 * 
 * class Optimizer(object):             # <<<<<<<<<<<<<<
 *     '''Do various flavours of stochastic gradient descent, with first and
 *     second order momentum.
 */
  __pyx_t_2 = __Pyx_CalculateMetaclass(NULL, __pyx_tuple__45); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 112, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  __pyx_t_3 = __Pyx_Py3MetaclassPrepare(__pyx_t_2, __pyx_tuple__45, __pyx_n_s_Optimizer, __pyx_n_s_Optimizer, (PyObject *) NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_kp_s_Do_various_flavours_of_stochasti); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 112, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_3);

  /* "thinc/neural/optimizers.pyx":124
 *     '''
 *     @classmethod
 *     def from_config(cls, config):             # <<<<<<<<<<<<<<
 *         return registry.make_from_config(config)
 * 
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_1from_config, __Pyx_CYFUNCTION_CLASSMETHOD, __pyx_n_s_Optimizer_from_config, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__21)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 124, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);

  /* "thinc/neural/optimizers.pyx":123
 *     * b1=0.999, b2=0.9: Adam
 *     '''
 *     @classmethod             # <<<<<<<<<<<<<<
 *     def from_config(cls, config):
 *         return registry.make_from_config(config)
 */
  __pyx_t_4 = __Pyx_Method_ClassMethod(__pyx_t_1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 123, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_from_config, __pyx_t_4) < 0) __PYX_ERR(0, 124, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":127
 *         return registry.make_from_config(config)
 * 
 *     def __init__(self, ops, lr, L2=1e-4, beta1=0.90, beta2=0.999, eps=1e-08,             # <<<<<<<<<<<<<<
 *                  max_grad_norm=10., gradient_noise=0.0, nesterov=True,
 *                  L2_is_weight_decay=False, lookahead_k=0, lookahead_alpha=0.5,
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_3__init__, 0, __pyx_n_s_Optimizer___init, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__22)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 127, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_4, __pyx_tuple__49);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_init, __pyx_t_4) < 0) __PYX_ERR(0, 127, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":166
 *         self.b2_decay = 0.0
 * 
 *     def to_gpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = CupyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_5to_gpu, 0, __pyx_n_s_Optimizer_to_gpu, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__23)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 166, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_to_gpu, __pyx_t_4) < 0) __PYX_ERR(0, 166, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":172
 *                 params[key] = self.ops.xp.asarray(value, dtype=value.dtype)
 * 
 *     def to_cpu(self):             # <<<<<<<<<<<<<<
 *         self.ops = NumpyOps()
 *         for params in (self.mom1, self.mom2, self.averages):
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_7to_cpu, 0, __pyx_n_s_Optimizer_to_cpu, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__24)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 172, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_to_cpu, __pyx_t_4) < 0) __PYX_ERR(0, 172, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":179
 *                     params[key] = value.get()
 * 
 *     def step_schedules(self):             # <<<<<<<<<<<<<<
 *         for key, schedule in self.schedules.items():
 *             setattr(self, key, next(schedule))
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_9step_schedules, 0, __pyx_n_s_Optimizer_step_schedules, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__25)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 179, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_step_schedules, __pyx_t_4) < 0) __PYX_ERR(0, 179, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":184
 * 
 *     @property
 *     def learn_rate(self):             # <<<<<<<<<<<<<<
 *         return self.alpha
 * 
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_11learn_rate, 0, __pyx_n_s_Optimizer_learn_rate, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__26)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 184, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);

  /* "thinc/neural/optimizers.pyx":183
 *             setattr(self, key, next(schedule))
 * 
 *     @property             # <<<<<<<<<<<<<<
 *     def learn_rate(self):
 *         return self.alpha
 */
  __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_property, __pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 183, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_learn_rate, __pyx_t_1) < 0) __PYX_ERR(0, 184, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":187
 *         return self.alpha
 * 
 *     @learn_rate.setter             # <<<<<<<<<<<<<<
 *     def learn_rate(self, learn_rate):
 *         self.alpha = learn_rate
 */
  __pyx_t_4 = PyObject_GetItem(__pyx_t_3, __pyx_n_s_learn_rate);
  if (unlikely(!__pyx_t_4)) {
    PyErr_Clear();
    __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_learn_rate);
  }
  if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 187, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_setter); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 187, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_5);
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;

  /* "thinc/neural/optimizers.pyx":188
 * 
 *     @learn_rate.setter
 *     def learn_rate(self, learn_rate):             # <<<<<<<<<<<<<<
 *         self.alpha = learn_rate
 * 
 */
  __pyx_t_4 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_13learn_rate, 0, __pyx_n_s_Optimizer_learn_rate, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__27)); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 188, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_4);
  __pyx_t_6 = NULL;
  if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) {
    __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5);
    if (likely(__pyx_t_6)) {
      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
      __Pyx_INCREF(__pyx_t_6);
      __Pyx_INCREF(function);
      __Pyx_DECREF_SET(__pyx_t_5, function);
    }
  }
  __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_4);
  __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0;
  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
  if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 187, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_learn_rate, __pyx_t_1) < 0) __PYX_ERR(0, 188, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":191
 *         self.alpha = learn_rate
 * 
 *     def lr(self, nr_upd):             # <<<<<<<<<<<<<<
 *         alpha = anneal(self.alpha, self.decay, self.decay_steps, nr_upd)
 *         if self.b1 == 0. or self.b2 == 0.:
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_15lr, 0, __pyx_n_s_Optimizer_lr, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__28)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 191, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_lr, __pyx_t_1) < 0) __PYX_ERR(0, 191, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":199
 *         return alpha * numpy.sqrt(fix2) / fix1
 * 
 *     def __call__(self, weights, gradient, lr_scale=1., key=None):             # <<<<<<<<<<<<<<
 *         assert len(gradient) >= 1
 *         xp = get_array_module(weights)
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_17__call__, 0, __pyx_n_s_Optimizer___call, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__29)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_1, __pyx_tuple__57);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_call, __pyx_t_1) < 0) __PYX_ERR(0, 199, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":242
 *             self.ops.update_averages(self.averages[key], weights, nr_upd)
 * 
 *     def _radam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_19_radam, 0, __pyx_n_s_Optimizer__radam, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__31)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 242, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_radam, __pyx_t_1) < 0) __PYX_ERR(0, 242, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":294
 *         weights -= lr * update
 * 
 *     def _nesterov(self, xp, weights, gradient, lr_scale, key):             # <<<<<<<<<<<<<<
 *         # http://cs231n.github.io/neural-networks-3/
 *         # v_prev = v # back this up
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_21_nesterov, 0, __pyx_n_s_Optimizer__nesterov, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__32)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 294, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_nesterov_2, __pyx_t_1) < 0) __PYX_ERR(0, 294, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":313
 *         weights += (1+self.b1) * momentum
 * 
 *     def _adam(self, xp, weights, gradient, lr_scale, key, nr_upd):             # <<<<<<<<<<<<<<
 *         if key not in self.mom1:
 *             self.mom1[key] = self.ops.allocate(weights.size)
 */
  __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_5thinc_6neural_10optimizers_9Optimizer_23_adam, 0, __pyx_n_s_Optimizer__adam, NULL, __pyx_n_s_thinc_neural_optimizers, __pyx_d, ((PyObject *)__pyx_codeobj__33)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 313, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (__Pyx_SetNameInClass(__pyx_t_3, __pyx_n_s_adam, __pyx_t_1) < 0) __PYX_ERR(0, 313, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

  /* "thinc/neural/optimizers.pyx":112
 * 
 * 
 * class Optimizer(object):             # <<<<<<<<<<<<<<
 *     '''Do various flavours of stochastic gradient descent, with first and
 *     second order momentum.
 */
  __pyx_t_1 = __Pyx_Py3ClassCreate(__pyx_t_2, __pyx_n_s_Optimizer, __pyx_tuple__45, __pyx_t_3, NULL, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 112, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_Optimizer, __pyx_t_1) < 0) __PYX_ERR(0, 112, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":329
 * 
 * 
 * def _make_ops(ops):             # <<<<<<<<<<<<<<
 *     if ops == "CupyOps":
 *         return CupyOps()
 */
  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_7_make_ops, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 329, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_make_ops, __pyx_t_2) < 0) __PYX_ERR(0, 329, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":343
 * # These are deprecated
 * 
 * def Adam(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     return Optimizer(*args, **kwargs)
 * 
 */
  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_9Adam, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 343, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_Adam, __pyx_t_2) < 0) __PYX_ERR(0, 343, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":346
 *     return Optimizer(*args, **kwargs)
 * 
 * def SGD(*args, **kwargs):             # <<<<<<<<<<<<<<
 *     kwargs.setdefault('beta1', 0.)
 *     kwargs.setdefault('beta2', 0.)
 */
  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_11SGD, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 346, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_SGD, __pyx_t_2) < 0) __PYX_ERR(0, 346, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":352
 * 
 * 
 * def linear_decay(rate, decay, nr_upd):             # <<<<<<<<<<<<<<
 *     return rate * 1./(1. + decay * nr_upd)
 * 
 */
  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_13linear_decay, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 352, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_linear_decay, __pyx_t_2) < 0) __PYX_ERR(0, 352, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":356
 * 
 * 
 * def anneal(rate, decay, decay_steps, nr_upd):             # <<<<<<<<<<<<<<
 *     if decay == 0.0:
 *         return rate
 */
  __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5thinc_6neural_10optimizers_15anneal, NULL, __pyx_n_s_thinc_neural_optimizers); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 356, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_anneal, __pyx_t_2) < 0) __PYX_ERR(0, 356, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;

  /* "thinc/neural/optimizers.pyx":1
 * # cython: profile=True             # <<<<<<<<<<<<<<
 * # cython: cdivision=True
 * # cython: infer_types=True
 */
  __pyx_t_2 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_2);
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
  __Pyx_TraceReturn(Py_None, 0);

  /*--- Wrapped vars code ---*/

  goto __pyx_L0;
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_XDECREF(__pyx_t_2);
  __Pyx_XDECREF(__pyx_t_3);
  __Pyx_XDECREF(__pyx_t_4);
  __Pyx_XDECREF(__pyx_t_5);
  __Pyx_XDECREF(__pyx_t_6);
  if (__pyx_m) {
    if (__pyx_d) {
      __Pyx_AddTraceback("init thinc.neural.optimizers", __pyx_clineno, __pyx_lineno, __pyx_filename);
    }
    Py_CLEAR(__pyx_m);
  } else if (!PyErr_Occurred()) {
    PyErr_SetString(PyExc_ImportError, "init thinc.neural.optimizers");
  }
  __pyx_L0:;
  __Pyx_RefNannyFinishContext();
  #if CYTHON_PEP489_MULTI_PHASE_INIT
  return (__pyx_m != NULL) ? 0 : -1;
  #elif PY_MAJOR_VERSION >= 3
  return __pyx_m;
  #else
  return;
  #endif
}

/* --- Runtime support code --- */
/* Refnanny */
#if CYTHON_REFNANNY
static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
    PyObject *m = NULL, *p = NULL;
    void *r = NULL;
    m = PyImport_ImportModule(modname);
    if (!m) goto end;
    p = PyObject_GetAttrString(m, "RefNannyAPI");
    if (!p) goto end;
    r = PyLong_AsVoidPtr(p);
end:
    Py_XDECREF(p);
    Py_XDECREF(m);
    return (__Pyx_RefNannyAPIStruct *)r;
}
#endif

/* PyObjectGetAttrStr */
#if CYTHON_USE_TYPE_SLOTS
static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
    PyTypeObject* tp = Py_TYPE(obj);
    if (likely(tp->tp_getattro))
        return tp->tp_getattro(obj, attr_name);
#if PY_MAJOR_VERSION < 3
    if (likely(tp->tp_getattr))
        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
#endif
    return PyObject_GetAttr(obj, attr_name);
}
#endif

/* GetBuiltinName */
static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
    PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
    if (unlikely(!result)) {
        PyErr_Format(PyExc_NameError,
#if PY_MAJOR_VERSION >= 3
            "name '%U' is not defined", name);
#else
            "name '%.200s' is not defined", PyString_AS_STRING(name));
#endif
    }
    return result;
}

/* RaiseDoubleKeywords */
static void __Pyx_RaiseDoubleKeywordsError(
    const char* func_name,
    PyObject* kw_name)
{
    PyErr_Format(PyExc_TypeError,
        #if PY_MAJOR_VERSION >= 3
        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
        #else
        "%s() got multiple values for keyword argument '%s'", func_name,
        PyString_AsString(kw_name));
        #endif
}

/* ParseKeywords */
static int __Pyx_ParseOptionalKeywords(
    PyObject *kwds,
    PyObject **argnames[],
    PyObject *kwds2,
    PyObject *values[],
    Py_ssize_t num_pos_args,
    const char* function_name)
{
    PyObject *key = 0, *value = 0;
    Py_ssize_t pos = 0;
    PyObject*** name;
    PyObject*** first_kw_arg = argnames + num_pos_args;
    while (PyDict_Next(kwds, &pos, &key, &value)) {
        name = first_kw_arg;
        while (*name && (**name != key)) name++;
        if (*name) {
            values[name-argnames] = value;
            continue;
        }
        name = first_kw_arg;
        #if PY_MAJOR_VERSION < 3
        if (likely(PyString_Check(key))) {
            while (*name) {
                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
                        && _PyString_Eq(**name, key)) {
                    values[name-argnames] = value;
                    break;
                }
                name++;
            }
            if (*name) continue;
            else {
                PyObject*** argname = argnames;
                while (argname != first_kw_arg) {
                    if ((**argname == key) || (
                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
                             && _PyString_Eq(**argname, key))) {
                        goto arg_passed_twice;
                    }
                    argname++;
                }
            }
        } else
        #endif
        if (likely(PyUnicode_Check(key))) {
            while (*name) {
                int cmp = (**name == key) ? 0 :
                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
                    (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
                #endif
                    PyUnicode_Compare(**name, key);
                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
                if (cmp == 0) {
                    values[name-argnames] = value;
                    break;
                }
                name++;
            }
            if (*name) continue;
            else {
                PyObject*** argname = argnames;
                while (argname != first_kw_arg) {
                    int cmp = (**argname == key) ? 0 :
                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
                        (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
                    #endif
                        PyUnicode_Compare(**argname, key);
                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
                    if (cmp == 0) goto arg_passed_twice;
                    argname++;
                }
            }
        } else
            goto invalid_keyword_type;
        if (kwds2) {
            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
        } else {
            goto invalid_keyword;
        }
    }
    return 0;
arg_passed_twice:
    __Pyx_RaiseDoubleKeywordsError(function_name, key);
    goto bad;
invalid_keyword_type:
    PyErr_Format(PyExc_TypeError,
        "%.200s() keywords must be strings", function_name);
    goto bad;
invalid_keyword:
    PyErr_Format(PyExc_TypeError,
    #if PY_MAJOR_VERSION < 3
        "%.200s() got an unexpected keyword argument '%.200s'",
        function_name, PyString_AsString(key));
    #else
        "%s() got an unexpected keyword argument '%U'",
        function_name, key);
    #endif
bad:
    return -1;
}

/* RaiseArgTupleInvalid */
static void __Pyx_RaiseArgtupleInvalid(
    const char* func_name,
    int exact,
    Py_ssize_t num_min,
    Py_ssize_t num_max,
    Py_ssize_t num_found)
{
    Py_ssize_t num_expected;
    const char *more_or_less;
    if (num_found < num_min) {
        num_expected = num_min;
        more_or_less = "at least";
    } else {
        num_expected = num_max;
        more_or_less = "at most";
    }
    if (exact) {
        more_or_less = "exactly";
    }
    PyErr_Format(PyExc_TypeError,
                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
                 func_name, more_or_less, num_expected,
                 (num_expected == 1) ? "" : "s", num_found);
}

/* PyErrFetchRestore */
#if CYTHON_FAST_THREAD_STATE
static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
    PyObject *tmp_type, *tmp_value, *tmp_tb;
    tmp_type = tstate->curexc_type;
    tmp_value = tstate->curexc_value;
    tmp_tb = tstate->curexc_traceback;
    tstate->curexc_type = type;
    tstate->curexc_value = value;
    tstate->curexc_traceback = tb;
    Py_XDECREF(tmp_type);
    Py_XDECREF(tmp_value);
    Py_XDECREF(tmp_tb);
}
static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
    *type = tstate->curexc_type;
    *value = tstate->curexc_value;
    *tb = tstate->curexc_traceback;
    tstate->curexc_type = 0;
    tstate->curexc_value = 0;
    tstate->curexc_traceback = 0;
}
#endif

/* Profile */
#if CYTHON_PROFILE
static int __Pyx_TraceSetupAndCall(PyCodeObject** code,
                                   PyFrameObject** frame,
                                   PyThreadState* tstate,
                                   const char *funcname,
                                   const char *srcfile,
                                   int firstlineno) {
    PyObject *type, *value, *traceback;
    int retval;
    if (*frame == NULL || !CYTHON_PROFILE_REUSE_FRAME) {
        if (*code == NULL) {
            *code = __Pyx_createFrameCodeObject(funcname, srcfile, firstlineno);
            if (*code == NULL) return 0;
        }
        *frame = PyFrame_New(
            tstate,                          /*PyThreadState *tstate*/
            *code,                           /*PyCodeObject *code*/
            __pyx_d,                  /*PyObject *globals*/
            0                                /*PyObject *locals*/
        );
        if (*frame == NULL) return 0;
        if (CYTHON_TRACE && (*frame)->f_trace == NULL) {
            Py_INCREF(Py_None);
            (*frame)->f_trace = Py_None;
        }
#if PY_VERSION_HEX < 0x030400B1
    } else {
        (*frame)->f_tstate = tstate;
#endif
    }
      __Pyx_PyFrame_SetLineNumber(*frame, firstlineno);
    retval = 1;
    tstate->tracing++;
    tstate->use_tracing = 0;
    __Pyx_ErrFetchInState(tstate, &type, &value, &traceback);
    #if CYTHON_TRACE
    if (tstate->c_tracefunc)
        retval = tstate->c_tracefunc(tstate->c_traceobj, *frame, PyTrace_CALL, NULL) == 0;
    if (retval && tstate->c_profilefunc)
    #endif
        retval = tstate->c_profilefunc(tstate->c_profileobj, *frame, PyTrace_CALL, NULL) == 0;
    tstate->use_tracing = (tstate->c_profilefunc ||
                           (CYTHON_TRACE && tstate->c_tracefunc));
    tstate->tracing--;
    if (retval) {
        __Pyx_ErrRestoreInState(tstate, type, value, traceback);
        return tstate->use_tracing && retval;
    } else {
        Py_XDECREF(type);
        Py_XDECREF(value);
        Py_XDECREF(traceback);
        return -1;
    }
}
static PyCodeObject *__Pyx_createFrameCodeObject(const char *funcname, const char *srcfile, int firstlineno) {
    PyCodeObject *py_code = 0;
#if PY_MAJOR_VERSION >= 3
    py_code = PyCode_NewEmpty(srcfile, funcname, firstlineno);
    if (likely(py_code)) {
        py_code->co_flags |= CO_OPTIMIZED | CO_NEWLOCALS;
    }
#else
    PyObject *py_srcfile = 0;
    PyObject *py_funcname = 0;
    py_funcname = PyString_FromString(funcname);
    if (unlikely(!py_funcname)) goto bad;
    py_srcfile = PyString_FromString(srcfile);
    if (unlikely(!py_srcfile)) goto bad;
    py_code = PyCode_New(
        0,
        0,
        0,
        CO_OPTIMIZED | CO_NEWLOCALS,
        __pyx_empty_bytes,     /*PyObject *code,*/
        __pyx_empty_tuple,     /*PyObject *consts,*/
        __pyx_empty_tuple,     /*PyObject *names,*/
        __pyx_empty_tuple,     /*PyObject *varnames,*/
        __pyx_empty_tuple,     /*PyObject *freevars,*/
        __pyx_empty_tuple,     /*PyObject *cellvars,*/
        py_srcfile,       /*PyObject *filename,*/
        py_funcname,      /*PyObject *name,*/
        firstlineno,
        __pyx_empty_bytes      /*PyObject *lnotab*/
    );
bad:
    Py_XDECREF(py_srcfile);
    Py_XDECREF(py_funcname);
#endif
    return py_code;
}
#endif

/* PyDictVersioning */
#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
    PyObject *dict = Py_TYPE(obj)->tp_dict;
    return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0;
}
static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) {
    PyObject **dictptr = NULL;
    Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset;
    if (offset) {
#if CYTHON_COMPILING_IN_CPYTHON
        dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj);
#else
        dictptr = _PyObject_GetDictPtr(obj);
#endif
    }
    return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0;
}
static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) {
    PyObject *dict = Py_TYPE(obj)->tp_dict;
    if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict)))
        return 0;
    return obj_dict_version == __Pyx_get_object_dict_version(obj);
}
#endif

/* GetModuleGlobalName */
#if CYTHON_USE_DICT_VERSIONS
static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
#else
static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
#endif
{
    PyObject *result;
#if !CYTHON_AVOID_BORROWED_REFS
#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1
    result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash);
    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
    if (likely(result)) {
        return __Pyx_NewRef(result);
    } else if (unlikely(PyErr_Occurred())) {
        return NULL;
    }
#else
    result = PyDict_GetItem(__pyx_d, name);
    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
    if (likely(result)) {
        return __Pyx_NewRef(result);
    }
#endif
#else
    result = PyObject_GetItem(__pyx_d, name);
    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
    if (likely(result)) {
        return __Pyx_NewRef(result);
    }
    PyErr_Clear();
#endif
    return __Pyx_GetBuiltinName(name);
}

/* PyCFunctionFastCall */
#if CYTHON_FAST_PYCCALL
static CYTHON_INLINE PyObject * __Pyx_PyCFunction_FastCall(PyObject *func_obj, PyObject **args, Py_ssize_t nargs) {
    PyCFunctionObject *func = (PyCFunctionObject*)func_obj;
    PyCFunction meth = PyCFunction_GET_FUNCTION(func);
    PyObject *self = PyCFunction_GET_SELF(func);
    int flags = PyCFunction_GET_FLAGS(func);
    assert(PyCFunction_Check(func));
    assert(METH_FASTCALL == (flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)));
    assert(nargs >= 0);
    assert(nargs == 0 || args != NULL);
    /* _PyCFunction_FastCallDict() must not be called with an exception set,
       because it may clear it (directly or indirectly) and so the
       caller loses its exception */
    assert(!PyErr_Occurred());
    if ((PY_VERSION_HEX < 0x030700A0) || unlikely(flags & METH_KEYWORDS)) {
        return (*((__Pyx_PyCFunctionFastWithKeywords)(void*)meth)) (self, args, nargs, NULL);
    } else {
        return (*((__Pyx_PyCFunctionFast)(void*)meth)) (self, args, nargs);
    }
}
#endif

/* PyFunctionFastCall */
#if CYTHON_FAST_PYCALL
static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na,
                                               PyObject *globals) {
    PyFrameObject *f;
    PyThreadState *tstate = __Pyx_PyThreadState_Current;
    PyObject **fastlocals;
    Py_ssize_t i;
    PyObject *result;
    assert(globals != NULL);
    /* XXX Perhaps we should create a specialized
       PyFrame_New() that doesn't take locals, but does
       take builtins without sanity checking them.
       */
    assert(tstate != NULL);
    f = PyFrame_New(tstate, co, globals, NULL);
    if (f == NULL) {
        return NULL;
    }
    fastlocals = __Pyx_PyFrame_GetLocalsplus(f);
    for (i = 0; i < na; i++) {
        Py_INCREF(*args);
        fastlocals[i] = *args++;
    }
    result = PyEval_EvalFrameEx(f,0);
    ++tstate->recursion_depth;
    Py_DECREF(f);
    --tstate->recursion_depth;
    return result;
}
#if 1 || PY_VERSION_HEX < 0x030600B1
static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) {
    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
    PyObject *globals = PyFunction_GET_GLOBALS(func);
    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
    PyObject *closure;
#if PY_MAJOR_VERSION >= 3
    PyObject *kwdefs;
#endif
    PyObject *kwtuple, **k;
    PyObject **d;
    Py_ssize_t nd;
    Py_ssize_t nk;
    PyObject *result;
    assert(kwargs == NULL || PyDict_Check(kwargs));
    nk = kwargs ? PyDict_Size(kwargs) : 0;
    if (Py_EnterRecursiveCall((char*)" while calling a Python object")) {
        return NULL;
    }
    if (
#if PY_MAJOR_VERSION >= 3
            co->co_kwonlyargcount == 0 &&
#endif
            likely(kwargs == NULL || nk == 0) &&
            co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
        if (argdefs == NULL && co->co_argcount == nargs) {
            result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals);
            goto done;
        }
        else if (nargs == 0 && argdefs != NULL
                 && co->co_argcount == Py_SIZE(argdefs)) {
            /* function called with no arguments, but all parameters have
               a default value: use default values as arguments .*/
            args = &PyTuple_GET_ITEM(argdefs, 0);
            result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals);
            goto done;
        }
    }
    if (kwargs != NULL) {
        Py_ssize_t pos, i;
        kwtuple = PyTuple_New(2 * nk);
        if (kwtuple == NULL) {
            result = NULL;
            goto done;
        }
        k = &PyTuple_GET_ITEM(kwtuple, 0);
        pos = i = 0;
        while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) {
            Py_INCREF(k[i]);
            Py_INCREF(k[i+1]);
            i += 2;
        }
        nk = i / 2;
    }
    else {
        kwtuple = NULL;
        k = NULL;
    }
    closure = PyFunction_GET_CLOSURE(func);
#if PY_MAJOR_VERSION >= 3
    kwdefs = PyFunction_GET_KW_DEFAULTS(func);
#endif
    if (argdefs != NULL) {
        d = &PyTuple_GET_ITEM(argdefs, 0);
        nd = Py_SIZE(argdefs);
    }
    else {
        d = NULL;
        nd = 0;
    }
#if PY_MAJOR_VERSION >= 3
    result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL,
                               args, (int)nargs,
                               k, (int)nk,
                               d, (int)nd, kwdefs, closure);
#else
    result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL,
                               args, (int)nargs,
                               k, (int)nk,
                               d, (int)nd, closure);
#endif
    Py_XDECREF(kwtuple);
done:
    Py_LeaveRecursiveCall();
    return result;
}
#endif
#endif

/* PyObjectCall */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
    PyObject *result;
    ternaryfunc call = func->ob_type->tp_call;
    if (unlikely(!call))
        return PyObject_Call(func, arg, kw);
    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
        return NULL;
    result = (*call)(func, arg, kw);
    Py_LeaveRecursiveCall();
    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
        PyErr_SetString(
            PyExc_SystemError,
            "NULL result without error in PyObject_Call");
    }
    return result;
}
#endif

/* PyObjectCall2Args */
static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2) {
    PyObject *args, *result = NULL;
    #if CYTHON_FAST_PYCALL
    if (PyFunction_Check(function)) {
        PyObject *args[2] = {arg1, arg2};
        return __Pyx_PyFunction_FastCall(function, args, 2);
    }
    #endif
    #if CYTHON_FAST_PYCCALL
    if (__Pyx_PyFastCFunction_Check(function)) {
        PyObject *args[2] = {arg1, arg2};
        return __Pyx_PyCFunction_FastCall(function, args, 2);
    }
    #endif
    args = PyTuple_New(2);
    if (unlikely(!args)) goto done;
    Py_INCREF(arg1);
    PyTuple_SET_ITEM(args, 0, arg1);
    Py_INCREF(arg2);
    PyTuple_SET_ITEM(args, 1, arg2);
    Py_INCREF(function);
    result = __Pyx_PyObject_Call(function, args, NULL);
    Py_DECREF(args);
    Py_DECREF(function);
done:
    return result;
}

/* PyObjectCallMethO */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
    PyObject *self, *result;
    PyCFunction cfunc;
    cfunc = PyCFunction_GET_FUNCTION(func);
    self = PyCFunction_GET_SELF(func);
    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
        return NULL;
    result = cfunc(self, arg);
    Py_LeaveRecursiveCall();
    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
        PyErr_SetString(
            PyExc_SystemError,
            "NULL result without error in PyObject_Call");
    }
    return result;
}
#endif

/* PyObjectCallOneArg */
#if CYTHON_COMPILING_IN_CPYTHON
static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
    PyObject *result;
    PyObject *args = PyTuple_New(1);
    if (unlikely(!args)) return NULL;
    Py_INCREF(arg);
    PyTuple_SET_ITEM(args, 0, arg);
    result = __Pyx_PyObject_Call(func, args, NULL);
    Py_DECREF(args);
    return result;
}
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
#if CYTHON_FAST_PYCALL
    if (PyFunction_Check(func)) {
        return __Pyx_PyFunction_FastCall(func, &arg, 1);
    }
#endif
    if (likely(PyCFunction_Check(func))) {
        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
            return __Pyx_PyObject_CallMethO(func, arg);
#if CYTHON_FAST_PYCCALL
        } else if (PyCFunction_GET_FLAGS(func) & METH_FASTCALL) {
            return __Pyx_PyCFunction_FastCall(func, &arg, 1);
#endif
        }
    }
    return __Pyx__PyObject_CallOneArg(func, arg);
}
#else
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
    PyObject *result;
    PyObject *args = PyTuple_Pack(1, arg);
    if (unlikely(!args)) return NULL;
    result = __Pyx_PyObject_Call(func, args, NULL);
    Py_DECREF(args);
    return result;
}
#endif

/* PyObjectSetAttrStr */
#if CYTHON_USE_TYPE_SLOTS
static CYTHON_INLINE int __Pyx_PyObject_SetAttrStr(PyObject* obj, PyObject* attr_name, PyObject* value) {
    PyTypeObject* tp = Py_TYPE(obj);
    if (likely(tp->tp_setattro))
        return tp->tp_setattro(obj, attr_name, value);
#if PY_MAJOR_VERSION < 3
    if (likely(tp->tp_setattr))
        return tp->tp_setattr(obj, PyString_AS_STRING(attr_name), value);
#endif
    return PyObject_SetAttr(obj, attr_name, value);
}
#endif

/* PyObjectCallNoArg */
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
#if CYTHON_FAST_PYCALL
    if (PyFunction_Check(func)) {
        return __Pyx_PyFunction_FastCall(func, NULL, 0);
    }
#endif
#ifdef __Pyx_CyFunction_USED
    if (likely(PyCFunction_Check(func) || __Pyx_CyFunction_Check(func)))
#else
    if (likely(PyCFunction_Check(func)))
#endif
    {
        if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
            return __Pyx_PyObject_CallMethO(func, NULL);
        }
    }
    return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL);
}
#endif

/* RaiseTooManyValuesToUnpack */
static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
    PyErr_Format(PyExc_ValueError,
                 "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
}

/* RaiseNeedMoreValuesToUnpack */
static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
    PyErr_Format(PyExc_ValueError,
                 "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
                 index, (index == 1) ? "" : "s");
}

/* IterFinish */
static CYTHON_INLINE int __Pyx_IterFinish(void) {
#if CYTHON_FAST_THREAD_STATE
    PyThreadState *tstate = __Pyx_PyThreadState_Current;
    PyObject* exc_type = tstate->curexc_type;
    if (unlikely(exc_type)) {
        if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) {
            PyObject *exc_value, *exc_tb;
            exc_value = tstate->curexc_value;
            exc_tb = tstate->curexc_traceback;
            tstate->curexc_type = 0;
            tstate->curexc_value = 0;
            tstate->curexc_traceback = 0;
            Py_DECREF(exc_type);
            Py_XDECREF(exc_value);
            Py_XDECREF(exc_tb);
            return 0;
        } else {
            return -1;
        }
    }
    return 0;
#else
    if (unlikely(PyErr_Occurred())) {
        if (likely(PyErr_ExceptionMatches(PyExc_StopIteration))) {
            PyErr_Clear();
            return 0;
        } else {
            return -1;
        }
    }
    return 0;
#endif
}

/* UnpackItemEndCheck */
static int __Pyx_IternextUnpackEndCheck(PyObject *retval, Py_ssize_t expected) {
    if (unlikely(retval)) {
        Py_DECREF(retval);
        __Pyx_RaiseTooManyValuesError(expected);
        return -1;
    } else {
        return __Pyx_IterFinish();
    }
    return 0;
}

/* GetAttr */
static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *o, PyObject *n) {
#if CYTHON_USE_TYPE_SLOTS
#if PY_MAJOR_VERSION >= 3
    if (likely(PyUnicode_Check(n)))
#else
    if (likely(PyString_Check(n)))
#endif
        return __Pyx_PyObject_GetAttrStr(o, n);
#endif
    return PyObject_GetAttr(o, n);
}

/* HasAttr */
static CYTHON_INLINE int __Pyx_HasAttr(PyObject *o, PyObject *n) {
    PyObject *r;
    if (unlikely(!__Pyx_PyBaseString_Check(n))) {
        PyErr_SetString(PyExc_TypeError,
                        "hasattr(): attribute name must be string");
        return -1;
    }
    r = __Pyx_GetAttr(o, n);
    if (unlikely(!r)) {
        PyErr_Clear();
        return 0;
    } else {
        Py_DECREF(r);
        return 1;
    }
}

/* IterNext */
static PyObject *__Pyx_PyIter_Next2Default(PyObject* defval) {
    PyObject* exc_type;
    __Pyx_PyThreadState_declare
    __Pyx_PyThreadState_assign
    exc_type = __Pyx_PyErr_Occurred();
    if (unlikely(exc_type)) {
        if (!defval || unlikely(!__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)))
            return NULL;
        __Pyx_PyErr_Clear();
        Py_INCREF(defval);
        return defval;
    }
    if (defval) {
        Py_INCREF(defval);
        return defval;
    }
    __Pyx_PyErr_SetNone(PyExc_StopIteration);
    return NULL;
}
static void __Pyx_PyIter_Next_ErrorNoIterator(PyObject *iterator) {
    PyErr_Format(PyExc_TypeError,
        "%.200s object is not an iterator", Py_TYPE(iterator)->tp_name);
}
static CYTHON_INLINE PyObject *__Pyx_PyIter_Next2(PyObject* iterator, PyObject* defval) {
    PyObject* next;
    iternextfunc iternext = Py_TYPE(iterator)->tp_iternext;
    if (likely(iternext)) {
#if CYTHON_USE_TYPE_SLOTS
        next = iternext(iterator);
        if (likely(next))
            return next;
        #if PY_VERSION_HEX >= 0x02070000
        if (unlikely(iternext == &_PyObject_NextNotImplemented))
            return NULL;
        #endif
#else
        next = PyIter_Next(iterator);
        if (likely(next))
            return next;
#endif
    } else if (CYTHON_USE_TYPE_SLOTS || unlikely(!PyIter_Check(iterator))) {
        __Pyx_PyIter_Next_ErrorNoIterator(iterator);
        return NULL;
    }
#if !CYTHON_USE_TYPE_SLOTS
    else {
        next = PyIter_Next(iterator);
        if (likely(next))
            return next;
    }
#endif
    return __Pyx_PyIter_Next2Default(defval);
}

/* PyFloatBinop */
#if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_EqObjC(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check) {
    const double b = floatval;
    double a;
    (void)inplace;
    (void)zerodivision_check;
    if (op1 == op2) {
        Py_RETURN_TRUE;
    }
    if (likely(PyFloat_CheckExact(op1))) {
        a = PyFloat_AS_DOUBLE(op1);
        
    } else
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op1))) {
        a = (double) PyInt_AS_LONG(op1);
        
    } else
    #endif
    if (likely(PyLong_CheckExact(op1))) {
        #if CYTHON_USE_PYLONG_INTERNALS
        const digit* digits = ((PyLongObject*)op1)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op1);
        switch (size) {
            case  0: a = 0.0; break;
            case -1: a = -(double) digits[0]; break;
            case  1: a = (double) digits[0]; break;
            case -2:
            case 2:
                if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (1 * PyLong_SHIFT < 53))) {
                    a = (double) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53) || (a < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -2)
                            a = -a;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -3:
            case 3:
                if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53))) {
                    a = (double) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53) || (a < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -3)
                            a = -a;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -4:
            case 4:
                if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53))) {
                    a = (double) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (4 * PyLong_SHIFT < 53) || (a < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -4)
                            a = -a;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            default:
        #else
        {
        #endif
            return (
                PyFloat_Type.tp_richcompare(op2, op1, Py_EQ));
        }
    } else {
        return (
            PyObject_RichCompare(op1, op2, Py_EQ));
    }
        if (a == b) {
            Py_RETURN_TRUE;
        } else {
            Py_RETURN_FALSE;
        }
}
#endif

/* PyFloatBinop */
  #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_SubtractCObj(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check) {
    const double a = floatval;
    double b, result;
    (void)inplace;
    (void)zerodivision_check;
    if (likely(PyFloat_CheckExact(op2))) {
        b = PyFloat_AS_DOUBLE(op2);
        
    } else
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op2))) {
        b = (double) PyInt_AS_LONG(op2);
        
    } else
    #endif
    if (likely(PyLong_CheckExact(op2))) {
        #if CYTHON_USE_PYLONG_INTERNALS
        const digit* digits = ((PyLongObject*)op2)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op2);
        switch (size) {
            case  0: b = 0.0; break;
            case -1: b = -(double) digits[0]; break;
            case  1: b = (double) digits[0]; break;
            case -2:
            case 2:
                if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (1 * PyLong_SHIFT < 53))) {
                    b = (double) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -2)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -3:
            case 3:
                if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53))) {
                    b = (double) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -3)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -4:
            case 4:
                if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53))) {
                    b = (double) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (4 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -4)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            default:
        #else
        {
        #endif
            b = PyLong_AsDouble(op2);
            if (unlikely(b == -1.0 && PyErr_Occurred())) return NULL;
            
        }
    } else {
        return (inplace ? PyNumber_InPlaceSubtract : PyNumber_Subtract)(op1, op2);
    }
        
        PyFPE_START_PROTECT("subtract", return NULL)
        result = a - b;
        PyFPE_END_PROTECT(result)
        return PyFloat_FromDouble(result);
}
#endif

/* PyIntBinop */
    #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, int inplace, int zerodivision_check) {
    (void)inplace;
    (void)zerodivision_check;
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op1))) {
        const long b = intval;
        long x;
        long a = PyInt_AS_LONG(op1);
            x = (long)((unsigned long)a + b);
            if (likely((x^a) >= 0 || (x^b) >= 0))
                return PyInt_FromLong(x);
            return PyLong_Type.tp_as_number->nb_add(op1, op2);
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op1))) {
        const long b = intval;
        long a, x;
#ifdef HAVE_LONG_LONG
        const PY_LONG_LONG llb = intval;
        PY_LONG_LONG lla, llx;
#endif
        const digit* digits = ((PyLongObject*)op1)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op1);
        if (likely(__Pyx_sst_abs(size) <= 1)) {
            a = likely(size) ? digits[0] : 0;
            if (size == -1) a = -a;
        } else {
            switch (size) {
                case -2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                default: return PyLong_Type.tp_as_number->nb_add(op1, op2);
            }
        }
                x = a + b;
            return PyLong_FromLong(x);
#ifdef HAVE_LONG_LONG
        long_long:
                llx = lla + llb;
            return PyLong_FromLongLong(llx);
#endif
        
        
    }
    #endif
    if (PyFloat_CheckExact(op1)) {
        const long b = intval;
        double a = PyFloat_AS_DOUBLE(op1);
            double result;
            PyFPE_START_PROTECT("add", return NULL)
            result = ((double)a) + (double)b;
            PyFPE_END_PROTECT(result)
            return PyFloat_FromDouble(result);
    }
    return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2);
}
#endif

/* GetItemInt */
    static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
    PyObject *r;
    if (!j) return NULL;
    r = PyObject_GetItem(o, j);
    Py_DECREF(j);
    return r;
}
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
                                                              CYTHON_NCP_UNUSED int wraparound,
                                                              CYTHON_NCP_UNUSED int boundscheck) {
#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    Py_ssize_t wrapped_i = i;
    if (wraparound & unlikely(i < 0)) {
        wrapped_i += PyList_GET_SIZE(o);
    }
    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) {
        PyObject *r = PyList_GET_ITEM(o, wrapped_i);
        Py_INCREF(r);
        return r;
    }
    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
#else
    return PySequence_GetItem(o, i);
#endif
}
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
                                                              CYTHON_NCP_UNUSED int wraparound,
                                                              CYTHON_NCP_UNUSED int boundscheck) {
#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    Py_ssize_t wrapped_i = i;
    if (wraparound & unlikely(i < 0)) {
        wrapped_i += PyTuple_GET_SIZE(o);
    }
    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) {
        PyObject *r = PyTuple_GET_ITEM(o, wrapped_i);
        Py_INCREF(r);
        return r;
    }
    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
#else
    return PySequence_GetItem(o, i);
#endif
}
static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
                                                     CYTHON_NCP_UNUSED int wraparound,
                                                     CYTHON_NCP_UNUSED int boundscheck) {
#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
    if (is_list || PyList_CheckExact(o)) {
        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
        if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
            PyObject *r = PyList_GET_ITEM(o, n);
            Py_INCREF(r);
            return r;
        }
    }
    else if (PyTuple_CheckExact(o)) {
        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
        if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
            PyObject *r = PyTuple_GET_ITEM(o, n);
            Py_INCREF(r);
            return r;
        }
    } else {
        PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
        if (likely(m && m->sq_item)) {
            if (wraparound && unlikely(i < 0) && likely(m->sq_length)) {
                Py_ssize_t l = m->sq_length(o);
                if (likely(l >= 0)) {
                    i += l;
                } else {
                    if (!PyErr_ExceptionMatches(PyExc_OverflowError))
                        return NULL;
                    PyErr_Clear();
                }
            }
            return m->sq_item(o, i);
        }
    }
#else
    if (is_list || PySequence_Check(o)) {
        return PySequence_GetItem(o, i);
    }
#endif
    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
}

/* ObjectGetItem */
    #if CYTHON_USE_TYPE_SLOTS
static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject* index) {
    PyObject *runerr;
    Py_ssize_t key_value;
    PySequenceMethods *m = Py_TYPE(obj)->tp_as_sequence;
    if (unlikely(!(m && m->sq_item))) {
        PyErr_Format(PyExc_TypeError, "'%.200s' object is not subscriptable", Py_TYPE(obj)->tp_name);
        return NULL;
    }
    key_value = __Pyx_PyIndex_AsSsize_t(index);
    if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) {
        return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1);
    }
    if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) {
        PyErr_Clear();
        PyErr_Format(PyExc_IndexError, "cannot fit '%.200s' into an index-sized integer", Py_TYPE(index)->tp_name);
    }
    return NULL;
}
static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key) {
    PyMappingMethods *m = Py_TYPE(obj)->tp_as_mapping;
    if (likely(m && m->mp_subscript)) {
        return m->mp_subscript(obj, key);
    }
    return __Pyx_PyObject_GetIndex(obj, key);
}
#endif

/* PyIntCompare */
    static CYTHON_INLINE PyObject* __Pyx_PyInt_NeObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED long inplace) {
    if (op1 == op2) {
        Py_RETURN_FALSE;
    }
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op1))) {
        const long b = intval;
        long a = PyInt_AS_LONG(op1);
        if (a != b) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op1))) {
        int unequal;
        unsigned long uintval;
        Py_ssize_t size = Py_SIZE(op1);
        const digit* digits = ((PyLongObject*)op1)->ob_digit;
        if (intval == 0) {
            if (size != 0) Py_RETURN_TRUE; else Py_RETURN_FALSE;
        } else if (intval < 0) {
            if (size >= 0)
                Py_RETURN_TRUE;
            intval = -intval;
            size = -size;
        } else {
            if (size <= 0)
                Py_RETURN_TRUE;
        }
        uintval = (unsigned long) intval;
#if PyLong_SHIFT * 4 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 4)) {
            unequal = (size != 5) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[4] != ((uintval >> (4 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 3 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 3)) {
            unequal = (size != 4) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 2 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 2)) {
            unequal = (size != 3) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 1 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 1)) {
            unequal = (size != 2) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
            unequal = (size != 1) || (((unsigned long) digits[0]) != (uintval & (unsigned long) PyLong_MASK));
        if (unequal != 0) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    #endif
    if (PyFloat_CheckExact(op1)) {
        const long b = intval;
        double a = PyFloat_AS_DOUBLE(op1);
        if ((double)a != (double)b) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    return (
        PyObject_RichCompare(op1, op2, Py_NE));
}

/* RaiseException */
    #if PY_MAJOR_VERSION < 3
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
                        CYTHON_UNUSED PyObject *cause) {
    __Pyx_PyThreadState_declare
    Py_XINCREF(type);
    if (!value || value == Py_None)
        value = NULL;
    else
        Py_INCREF(value);
    if (!tb || tb == Py_None)
        tb = NULL;
    else {
        Py_INCREF(tb);
        if (!PyTraceBack_Check(tb)) {
            PyErr_SetString(PyExc_TypeError,
                "raise: arg 3 must be a traceback or None");
            goto raise_error;
        }
    }
    if (PyType_Check(type)) {
#if CYTHON_COMPILING_IN_PYPY
        if (!value) {
            Py_INCREF(Py_None);
            value = Py_None;
        }
#endif
        PyErr_NormalizeException(&type, &value, &tb);
    } else {
        if (value) {
            PyErr_SetString(PyExc_TypeError,
                "instance exception may not have a separate value");
            goto raise_error;
        }
        value = type;
        type = (PyObject*) Py_TYPE(type);
        Py_INCREF(type);
        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
            PyErr_SetString(PyExc_TypeError,
                "raise: exception class must be a subclass of BaseException");
            goto raise_error;
        }
    }
    __Pyx_PyThreadState_assign
    __Pyx_ErrRestore(type, value, tb);
    return;
raise_error:
    Py_XDECREF(value);
    Py_XDECREF(type);
    Py_XDECREF(tb);
    return;
}
#else
static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
    PyObject* owned_instance = NULL;
    if (tb == Py_None) {
        tb = 0;
    } else if (tb && !PyTraceBack_Check(tb)) {
        PyErr_SetString(PyExc_TypeError,
            "raise: arg 3 must be a traceback or None");
        goto bad;
    }
    if (value == Py_None)
        value = 0;
    if (PyExceptionInstance_Check(type)) {
        if (value) {
            PyErr_SetString(PyExc_TypeError,
                "instance exception may not have a separate value");
            goto bad;
        }
        value = type;
        type = (PyObject*) Py_TYPE(value);
    } else if (PyExceptionClass_Check(type)) {
        PyObject *instance_class = NULL;
        if (value && PyExceptionInstance_Check(value)) {
            instance_class = (PyObject*) Py_TYPE(value);
            if (instance_class != type) {
                int is_subclass = PyObject_IsSubclass(instance_class, type);
                if (!is_subclass) {
                    instance_class = NULL;
                } else if (unlikely(is_subclass == -1)) {
                    goto bad;
                } else {
                    type = instance_class;
                }
            }
        }
        if (!instance_class) {
            PyObject *args;
            if (!value)
                args = PyTuple_New(0);
            else if (PyTuple_Check(value)) {
                Py_INCREF(value);
                args = value;
            } else
                args = PyTuple_Pack(1, value);
            if (!args)
                goto bad;
            owned_instance = PyObject_Call(type, args, NULL);
            Py_DECREF(args);
            if (!owned_instance)
                goto bad;
            value = owned_instance;
            if (!PyExceptionInstance_Check(value)) {
                PyErr_Format(PyExc_TypeError,
                             "calling %R should have returned an instance of "
                             "BaseException, not %R",
                             type, Py_TYPE(value));
                goto bad;
            }
        }
    } else {
        PyErr_SetString(PyExc_TypeError,
            "raise: exception class must be a subclass of BaseException");
        goto bad;
    }
    if (cause) {
        PyObject *fixed_cause;
        if (cause == Py_None) {
            fixed_cause = NULL;
        } else if (PyExceptionClass_Check(cause)) {
            fixed_cause = PyObject_CallObject(cause, NULL);
            if (fixed_cause == NULL)
                goto bad;
        } else if (PyExceptionInstance_Check(cause)) {
            fixed_cause = cause;
            Py_INCREF(fixed_cause);
        } else {
            PyErr_SetString(PyExc_TypeError,
                            "exception causes must derive from "
                            "BaseException");
            goto bad;
        }
        PyException_SetCause(value, fixed_cause);
    }
    PyErr_SetObject(type, value);
    if (tb) {
#if CYTHON_COMPILING_IN_PYPY
        PyObject *tmp_type, *tmp_value, *tmp_tb;
        PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
        Py_INCREF(tb);
        PyErr_Restore(tmp_type, tmp_value, tb);
        Py_XDECREF(tmp_tb);
#else
        PyThreadState *tstate = __Pyx_PyThreadState_Current;
        PyObject* tmp_tb = tstate->curexc_traceback;
        if (tb != tmp_tb) {
            Py_INCREF(tb);
            tstate->curexc_traceback = tb;
            Py_XDECREF(tmp_tb);
        }
#endif
    }
bad:
    Py_XDECREF(owned_instance);
    return;
}
#endif

/* PyIntCompare */
    static CYTHON_INLINE PyObject* __Pyx_PyInt_EqObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, CYTHON_UNUSED long inplace) {
    if (op1 == op2) {
        Py_RETURN_TRUE;
    }
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op1))) {
        const long b = intval;
        long a = PyInt_AS_LONG(op1);
        if (a == b) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op1))) {
        int unequal;
        unsigned long uintval;
        Py_ssize_t size = Py_SIZE(op1);
        const digit* digits = ((PyLongObject*)op1)->ob_digit;
        if (intval == 0) {
            if (size == 0) Py_RETURN_TRUE; else Py_RETURN_FALSE;
        } else if (intval < 0) {
            if (size >= 0)
                Py_RETURN_FALSE;
            intval = -intval;
            size = -size;
        } else {
            if (size <= 0)
                Py_RETURN_FALSE;
        }
        uintval = (unsigned long) intval;
#if PyLong_SHIFT * 4 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 4)) {
            unequal = (size != 5) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[4] != ((uintval >> (4 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 3 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 3)) {
            unequal = (size != 4) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[3] != ((uintval >> (3 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 2 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 2)) {
            unequal = (size != 3) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK)) | (digits[2] != ((uintval >> (2 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
#if PyLong_SHIFT * 1 < SIZEOF_LONG*8
        if (uintval >> (PyLong_SHIFT * 1)) {
            unequal = (size != 2) || (digits[0] != (uintval & (unsigned long) PyLong_MASK))
                 | (digits[1] != ((uintval >> (1 * PyLong_SHIFT)) & (unsigned long) PyLong_MASK));
        } else
#endif
            unequal = (size != 1) || (((unsigned long) digits[0]) != (uintval & (unsigned long) PyLong_MASK));
        if (unequal == 0) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    #endif
    if (PyFloat_CheckExact(op1)) {
        const long b = intval;
        double a = PyFloat_AS_DOUBLE(op1);
        if ((double)a == (double)b) Py_RETURN_TRUE; else Py_RETURN_FALSE;
    }
    return (
        PyObject_RichCompare(op1, op2, Py_EQ));
}

/* SliceObject */
    static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value,
        Py_ssize_t cstart, Py_ssize_t cstop,
        PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice,
        int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) {
#if CYTHON_USE_TYPE_SLOTS
    PyMappingMethods* mp;
#if PY_MAJOR_VERSION < 3
    PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence;
    if (likely(ms && ms->sq_ass_slice)) {
        if (!has_cstart) {
            if (_py_start && (*_py_start != Py_None)) {
                cstart = __Pyx_PyIndex_AsSsize_t(*_py_start);
                if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
            } else
                cstart = 0;
        }
        if (!has_cstop) {
            if (_py_stop && (*_py_stop != Py_None)) {
                cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop);
                if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
            } else
                cstop = PY_SSIZE_T_MAX;
        }
        if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) {
            Py_ssize_t l = ms->sq_length(obj);
            if (likely(l >= 0)) {
                if (cstop < 0) {
                    cstop += l;
                    if (cstop < 0) cstop = 0;
                }
                if (cstart < 0) {
                    cstart += l;
                    if (cstart < 0) cstart = 0;
                }
            } else {
                if (!PyErr_ExceptionMatches(PyExc_OverflowError))
                    goto bad;
                PyErr_Clear();
            }
        }
        return ms->sq_ass_slice(obj, cstart, cstop, value);
    }
#endif
    mp = Py_TYPE(obj)->tp_as_mapping;
    if (likely(mp && mp->mp_ass_subscript))
#endif
    {
        int result;
        PyObject *py_slice, *py_start, *py_stop;
        if (_py_slice) {
            py_slice = *_py_slice;
        } else {
            PyObject* owned_start = NULL;
            PyObject* owned_stop = NULL;
            if (_py_start) {
                py_start = *_py_start;
            } else {
                if (has_cstart) {
                    owned_start = py_start = PyInt_FromSsize_t(cstart);
                    if (unlikely(!py_start)) goto bad;
                } else
                    py_start = Py_None;
            }
            if (_py_stop) {
                py_stop = *_py_stop;
            } else {
                if (has_cstop) {
                    owned_stop = py_stop = PyInt_FromSsize_t(cstop);
                    if (unlikely(!py_stop)) {
                        Py_XDECREF(owned_start);
                        goto bad;
                    }
                } else
                    py_stop = Py_None;
            }
            py_slice = PySlice_New(py_start, py_stop, Py_None);
            Py_XDECREF(owned_start);
            Py_XDECREF(owned_stop);
            if (unlikely(!py_slice)) goto bad;
        }
#if CYTHON_USE_TYPE_SLOTS
        result = mp->mp_ass_subscript(obj, py_slice, value);
#else
        result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice);
#endif
        if (!_py_slice) {
            Py_DECREF(py_slice);
        }
        return result;
    }
    PyErr_Format(PyExc_TypeError,
        "'%.200s' object does not support slice %.10s",
        Py_TYPE(obj)->tp_name, value ? "assignment" : "deletion");
bad:
    return -1;
}

/* PyIntBinop */
    #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_SubtractObjC(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, int inplace, int zerodivision_check) {
    (void)inplace;
    (void)zerodivision_check;
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op1))) {
        const long b = intval;
        long x;
        long a = PyInt_AS_LONG(op1);
            x = (long)((unsigned long)a - b);
            if (likely((x^a) >= 0 || (x^~b) >= 0))
                return PyInt_FromLong(x);
            return PyLong_Type.tp_as_number->nb_subtract(op1, op2);
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op1))) {
        const long b = intval;
        long a, x;
#ifdef HAVE_LONG_LONG
        const PY_LONG_LONG llb = intval;
        PY_LONG_LONG lla, llx;
#endif
        const digit* digits = ((PyLongObject*)op1)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op1);
        if (likely(__Pyx_sst_abs(size) <= 1)) {
            a = likely(size) ? digits[0] : 0;
            if (size == -1) a = -a;
        } else {
            switch (size) {
                case -2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                default: return PyLong_Type.tp_as_number->nb_subtract(op1, op2);
            }
        }
                x = a - b;
            return PyLong_FromLong(x);
#ifdef HAVE_LONG_LONG
        long_long:
                llx = lla - llb;
            return PyLong_FromLongLong(llx);
#endif
        
        
    }
    #endif
    if (PyFloat_CheckExact(op1)) {
        const long b = intval;
        double a = PyFloat_AS_DOUBLE(op1);
            double result;
            PyFPE_START_PROTECT("subtract", return NULL)
            result = ((double)a) - (double)b;
            PyFPE_END_PROTECT(result)
            return PyFloat_FromDouble(result);
    }
    return (inplace ? PyNumber_InPlaceSubtract : PyNumber_Subtract)(op1, op2);
}
#endif

/* PyIntBinop */
    #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_SubtractCObj(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, int inplace, int zerodivision_check) {
    (void)inplace;
    (void)zerodivision_check;
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op2))) {
        const long a = intval;
        long x;
        long b = PyInt_AS_LONG(op2);
            x = (long)((unsigned long)a - b);
            if (likely((x^a) >= 0 || (x^~b) >= 0))
                return PyInt_FromLong(x);
            return PyLong_Type.tp_as_number->nb_subtract(op1, op2);
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op2))) {
        const long a = intval;
        long b, x;
#ifdef HAVE_LONG_LONG
        const PY_LONG_LONG lla = intval;
        PY_LONG_LONG llb, llx;
#endif
        const digit* digits = ((PyLongObject*)op2)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op2);
        if (likely(__Pyx_sst_abs(size) <= 1)) {
            b = likely(size) ? digits[0] : 0;
            if (size == -1) b = -b;
        } else {
            switch (size) {
                case -2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        b = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        b = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        b = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        b = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        b = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        b = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                default: return PyLong_Type.tp_as_number->nb_subtract(op1, op2);
            }
        }
                x = a - b;
            return PyLong_FromLong(x);
#ifdef HAVE_LONG_LONG
        long_long:
                llx = lla - llb;
            return PyLong_FromLongLong(llx);
#endif
        
        
    }
    #endif
    if (PyFloat_CheckExact(op2)) {
        const long a = intval;
        double b = PyFloat_AS_DOUBLE(op2);
            double result;
            PyFPE_START_PROTECT("subtract", return NULL)
            result = ((double)a) - (double)b;
            PyFPE_END_PROTECT(result)
            return PyFloat_FromDouble(result);
    }
    return (inplace ? PyNumber_InPlaceSubtract : PyNumber_Subtract)(op1, op2);
}
#endif

/* PyIntBinop */
    #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyInt_AddCObj(PyObject *op1, PyObject *op2, CYTHON_UNUSED long intval, int inplace, int zerodivision_check) {
    (void)inplace;
    (void)zerodivision_check;
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op2))) {
        const long a = intval;
        long x;
        long b = PyInt_AS_LONG(op2);
            x = (long)((unsigned long)a + b);
            if (likely((x^a) >= 0 || (x^b) >= 0))
                return PyInt_FromLong(x);
            return PyLong_Type.tp_as_number->nb_add(op1, op2);
    }
    #endif
    #if CYTHON_USE_PYLONG_INTERNALS
    if (likely(PyLong_CheckExact(op2))) {
        const long a = intval;
        long b, x;
#ifdef HAVE_LONG_LONG
        const PY_LONG_LONG lla = intval;
        PY_LONG_LONG llb, llx;
#endif
        const digit* digits = ((PyLongObject*)op2)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op2);
        if (likely(__Pyx_sst_abs(size) <= 1)) {
            b = likely(size) ? digits[0] : 0;
            if (size == -1) b = -b;
        } else {
            switch (size) {
                case -2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        b = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 2:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        b = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        b = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 3:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        b = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case -4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        b = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        llb = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                case 4:
                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                        b = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                        break;
#ifdef HAVE_LONG_LONG
                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
                        llb = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
                        goto long_long;
#endif
                    }
                    CYTHON_FALLTHROUGH;
                default: return PyLong_Type.tp_as_number->nb_add(op1, op2);
            }
        }
                x = a + b;
            return PyLong_FromLong(x);
#ifdef HAVE_LONG_LONG
        long_long:
                llx = lla + llb;
            return PyLong_FromLongLong(llx);
#endif
        
        
    }
    #endif
    if (PyFloat_CheckExact(op2)) {
        const long a = intval;
        double b = PyFloat_AS_DOUBLE(op2);
            double result;
            PyFPE_START_PROTECT("add", return NULL)
            result = ((double)a) + (double)b;
            PyFPE_END_PROTECT(result)
            return PyFloat_FromDouble(result);
    }
    return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2);
}
#endif

/* BytesEquals */
    static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
#if CYTHON_COMPILING_IN_PYPY
    return PyObject_RichCompareBool(s1, s2, equals);
#else
    if (s1 == s2) {
        return (equals == Py_EQ);
    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
        const char *ps1, *ps2;
        Py_ssize_t length = PyBytes_GET_SIZE(s1);
        if (length != PyBytes_GET_SIZE(s2))
            return (equals == Py_NE);
        ps1 = PyBytes_AS_STRING(s1);
        ps2 = PyBytes_AS_STRING(s2);
        if (ps1[0] != ps2[0]) {
            return (equals == Py_NE);
        } else if (length == 1) {
            return (equals == Py_EQ);
        } else {
            int result;
#if CYTHON_USE_UNICODE_INTERNALS
            Py_hash_t hash1, hash2;
            hash1 = ((PyBytesObject*)s1)->ob_shash;
            hash2 = ((PyBytesObject*)s2)->ob_shash;
            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
                return (equals == Py_NE);
            }
#endif
            result = memcmp(ps1, ps2, (size_t)length);
            return (equals == Py_EQ) ? (result == 0) : (result != 0);
        }
    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
        return (equals == Py_NE);
    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
        return (equals == Py_NE);
    } else {
        int result;
        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
        if (!py_result)
            return -1;
        result = __Pyx_PyObject_IsTrue(py_result);
        Py_DECREF(py_result);
        return result;
    }
#endif
}

/* UnicodeEquals */
    static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
#if CYTHON_COMPILING_IN_PYPY
    return PyObject_RichCompareBool(s1, s2, equals);
#else
#if PY_MAJOR_VERSION < 3
    PyObject* owned_ref = NULL;
#endif
    int s1_is_unicode, s2_is_unicode;
    if (s1 == s2) {
        goto return_eq;
    }
    s1_is_unicode = PyUnicode_CheckExact(s1);
    s2_is_unicode = PyUnicode_CheckExact(s2);
#if PY_MAJOR_VERSION < 3
    if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
        owned_ref = PyUnicode_FromObject(s2);
        if (unlikely(!owned_ref))
            return -1;
        s2 = owned_ref;
        s2_is_unicode = 1;
    } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
        owned_ref = PyUnicode_FromObject(s1);
        if (unlikely(!owned_ref))
            return -1;
        s1 = owned_ref;
        s1_is_unicode = 1;
    } else if (((!s2_is_unicode) & (!s1_is_unicode))) {
        return __Pyx_PyBytes_Equals(s1, s2, equals);
    }
#endif
    if (s1_is_unicode & s2_is_unicode) {
        Py_ssize_t length;
        int kind;
        void *data1, *data2;
        if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0))
            return -1;
        length = __Pyx_PyUnicode_GET_LENGTH(s1);
        if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
            goto return_ne;
        }
#if CYTHON_USE_UNICODE_INTERNALS
        {
            Py_hash_t hash1, hash2;
        #if CYTHON_PEP393_ENABLED
            hash1 = ((PyASCIIObject*)s1)->hash;
            hash2 = ((PyASCIIObject*)s2)->hash;
        #else
            hash1 = ((PyUnicodeObject*)s1)->hash;
            hash2 = ((PyUnicodeObject*)s2)->hash;
        #endif
            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
                goto return_ne;
            }
        }
#endif
        kind = __Pyx_PyUnicode_KIND(s1);
        if (kind != __Pyx_PyUnicode_KIND(s2)) {
            goto return_ne;
        }
        data1 = __Pyx_PyUnicode_DATA(s1);
        data2 = __Pyx_PyUnicode_DATA(s2);
        if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
            goto return_ne;
        } else if (length == 1) {
            goto return_eq;
        } else {
            int result = memcmp(data1, data2, (size_t)(length * kind));
            #if PY_MAJOR_VERSION < 3
            Py_XDECREF(owned_ref);
            #endif
            return (equals == Py_EQ) ? (result == 0) : (result != 0);
        }
    } else if ((s1 == Py_None) & s2_is_unicode) {
        goto return_ne;
    } else if ((s2 == Py_None) & s1_is_unicode) {
        goto return_ne;
    } else {
        int result;
        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
        #if PY_MAJOR_VERSION < 3
        Py_XDECREF(owned_ref);
        #endif
        if (!py_result)
            return -1;
        result = __Pyx_PyObject_IsTrue(py_result);
        Py_DECREF(py_result);
        return result;
    }
return_eq:
    #if PY_MAJOR_VERSION < 3
    Py_XDECREF(owned_ref);
    #endif
    return (equals == Py_EQ);
return_ne:
    #if PY_MAJOR_VERSION < 3
    Py_XDECREF(owned_ref);
    #endif
    return (equals == Py_NE);
#endif
}

/* Import */
    static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
    PyObject *empty_list = 0;
    PyObject *module = 0;
    PyObject *global_dict = 0;
    PyObject *empty_dict = 0;
    PyObject *list;
    #if PY_MAJOR_VERSION < 3
    PyObject *py_import;
    py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
    if (!py_import)
        goto bad;
    #endif
    if (from_list)
        list = from_list;
    else {
        empty_list = PyList_New(0);
        if (!empty_list)
            goto bad;
        list = empty_list;
    }
    global_dict = PyModule_GetDict(__pyx_m);
    if (!global_dict)
        goto bad;
    empty_dict = PyDict_New();
    if (!empty_dict)
        goto bad;
    {
        #if PY_MAJOR_VERSION >= 3
        if (level == -1) {
            if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
                module = PyImport_ImportModuleLevelObject(
                    name, global_dict, empty_dict, list, 1);
                if (!module) {
                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
                        goto bad;
                    PyErr_Clear();
                }
            }
            level = 0;
        }
        #endif
        if (!module) {
            #if PY_MAJOR_VERSION < 3
            PyObject *py_level = PyInt_FromLong(level);
            if (!py_level)
                goto bad;
            module = PyObject_CallFunctionObjArgs(py_import,
                name, global_dict, empty_dict, list, py_level, (PyObject *)NULL);
            Py_DECREF(py_level);
            #else
            module = PyImport_ImportModuleLevelObject(
                name, global_dict, empty_dict, list, level);
            #endif
        }
    }
bad:
    #if PY_MAJOR_VERSION < 3
    Py_XDECREF(py_import);
    #endif
    Py_XDECREF(empty_list);
    Py_XDECREF(empty_dict);
    return module;
}

/* ImportFrom */
    static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
    PyObject* value = __Pyx_PyObject_GetAttrStr(module, name);
    if (unlikely(!value) && PyErr_ExceptionMatches(PyExc_AttributeError)) {
        PyErr_Format(PyExc_ImportError,
        #if PY_MAJOR_VERSION < 3
            "cannot import name %.230s", PyString_AS_STRING(name));
        #else
            "cannot import name %S", name);
        #endif
    }
    return value;
}

/* KeywordStringCheck */
    static int __Pyx_CheckKeywordStrings(
    PyObject *kwdict,
    const char* function_name,
    int kw_allowed)
{
    PyObject* key = 0;
    Py_ssize_t pos = 0;
#if CYTHON_COMPILING_IN_PYPY
    if (!kw_allowed && PyDict_Next(kwdict, &pos, &key, 0))
        goto invalid_keyword;
    return 1;
#else
    while (PyDict_Next(kwdict, &pos, &key, 0)) {
        #if PY_MAJOR_VERSION < 3
        if (unlikely(!PyString_Check(key)))
        #endif
            if (unlikely(!PyUnicode_Check(key)))
                goto invalid_keyword_type;
    }
    if ((!kw_allowed) && unlikely(key))
        goto invalid_keyword;
    return 1;
invalid_keyword_type:
    PyErr_Format(PyExc_TypeError,
        "%.200s() keywords must be strings", function_name);
    return 0;
#endif
invalid_keyword:
    PyErr_Format(PyExc_TypeError,
    #if PY_MAJOR_VERSION < 3
        "%.200s() got an unexpected keyword argument '%.200s'",
        function_name, PyString_AsString(key));
    #else
        "%s() got an unexpected keyword argument '%U'",
        function_name, key);
    #endif
    return 0;
}

/* UnpackUnboundCMethod */
    static int __Pyx_TryUnpackUnboundCMethod(__Pyx_CachedCFunction* target) {
    PyObject *method;
    method = __Pyx_PyObject_GetAttrStr(target->type, *target->method_name);
    if (unlikely(!method))
        return -1;
    target->method = method;
#if CYTHON_COMPILING_IN_CPYTHON
    #if PY_MAJOR_VERSION >= 3
    if (likely(__Pyx_TypeCheck(method, &PyMethodDescr_Type)))
    #endif
    {
        PyMethodDescrObject *descr = (PyMethodDescrObject*) method;
        target->func = descr->d_method->ml_meth;
        target->flag = descr->d_method->ml_flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_STACKLESS);
    }
#endif
    return 0;
}

/* CallUnboundCMethod2 */
    #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030600B1
static CYTHON_INLINE PyObject *__Pyx_CallUnboundCMethod2(__Pyx_CachedCFunction *cfunc, PyObject *self, PyObject *arg1, PyObject *arg2) {
    if (likely(cfunc->func)) {
        PyObject *args[2] = {arg1, arg2};
        if (cfunc->flag == METH_FASTCALL) {
            #if PY_VERSION_HEX >= 0x030700A0
            return (*(__Pyx_PyCFunctionFast)(void*)(PyCFunction)cfunc->func)(self, args, 2);
            #else
            return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL);
            #endif
        }
        #if PY_VERSION_HEX >= 0x030700A0
        if (cfunc->flag == (METH_FASTCALL | METH_KEYWORDS))
            return (*(__Pyx_PyCFunctionFastWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, 2, NULL);
        #endif
    }
    return __Pyx__CallUnboundCMethod2(cfunc, self, arg1, arg2);
}
#endif
static PyObject* __Pyx__CallUnboundCMethod2(__Pyx_CachedCFunction* cfunc, PyObject* self, PyObject* arg1, PyObject* arg2){
    PyObject *args, *result = NULL;
    if (unlikely(!cfunc->func && !cfunc->method) && unlikely(__Pyx_TryUnpackUnboundCMethod(cfunc) < 0)) return NULL;
#if CYTHON_COMPILING_IN_CPYTHON
    if (cfunc->func && (cfunc->flag & METH_VARARGS)) {
        args = PyTuple_New(2);
        if (unlikely(!args)) goto bad;
        Py_INCREF(arg1);
        PyTuple_SET_ITEM(args, 0, arg1);
        Py_INCREF(arg2);
        PyTuple_SET_ITEM(args, 1, arg2);
        if (cfunc->flag & METH_KEYWORDS)
            result = (*(PyCFunctionWithKeywords)(void*)(PyCFunction)cfunc->func)(self, args, NULL);
        else
            result = (*cfunc->func)(self, args);
    } else {
        args = PyTuple_New(3);
        if (unlikely(!args)) goto bad;
        Py_INCREF(self);
        PyTuple_SET_ITEM(args, 0, self);
        Py_INCREF(arg1);
        PyTuple_SET_ITEM(args, 1, arg1);
        Py_INCREF(arg2);
        PyTuple_SET_ITEM(args, 2, arg2);
        result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
    }
#else
    args = PyTuple_Pack(3, self, arg1, arg2);
    if (unlikely(!args)) goto bad;
    result = __Pyx_PyObject_Call(cfunc->method, args, NULL);
#endif
bad:
    Py_XDECREF(args);
    return result;
}

/* dict_setdefault */
    static CYTHON_INLINE PyObject *__Pyx_PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *default_value,
                                                       CYTHON_UNUSED int is_safe_type) {
    PyObject* value;
#if PY_VERSION_HEX >= 0x030400A0
    if ((1)) {
        value = PyDict_SetDefault(d, key, default_value);
        if (unlikely(!value)) return NULL;
        Py_INCREF(value);
#else
    if (is_safe_type == 1 || (is_safe_type == -1 &&
#if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
            (PyUnicode_CheckExact(key) || PyString_CheckExact(key) || PyLong_CheckExact(key)))) {
        value = PyDict_GetItemWithError(d, key);
        if (unlikely(!value)) {
            if (unlikely(PyErr_Occurred()))
                return NULL;
            if (unlikely(PyDict_SetItem(d, key, default_value) == -1))
                return NULL;
            value = default_value;
        }
        Py_INCREF(value);
#else
            (PyString_CheckExact(key) || PyUnicode_CheckExact(key) || PyInt_CheckExact(key) || PyLong_CheckExact(key)))) {
        value = PyDict_GetItem(d, key);
        if (unlikely(!value)) {
            if (unlikely(PyDict_SetItem(d, key, default_value) == -1))
                return NULL;
            value = default_value;
        }
        Py_INCREF(value);
#endif
#endif
    } else {
        value = __Pyx_CallUnboundCMethod2(&__pyx_umethod_PyDict_Type_setdefault, d, key, default_value);
    }
    return value;
}

/* PyFloatBinop */
        #if !CYTHON_COMPILING_IN_PYPY
static PyObject* __Pyx_PyFloat_AddCObj(PyObject *op1, PyObject *op2, double floatval, int inplace, int zerodivision_check) {
    const double a = floatval;
    double b, result;
    (void)inplace;
    (void)zerodivision_check;
    if (likely(PyFloat_CheckExact(op2))) {
        b = PyFloat_AS_DOUBLE(op2);
        
    } else
    #if PY_MAJOR_VERSION < 3
    if (likely(PyInt_CheckExact(op2))) {
        b = (double) PyInt_AS_LONG(op2);
        
    } else
    #endif
    if (likely(PyLong_CheckExact(op2))) {
        #if CYTHON_USE_PYLONG_INTERNALS
        const digit* digits = ((PyLongObject*)op2)->ob_digit;
        const Py_ssize_t size = Py_SIZE(op2);
        switch (size) {
            case  0: b = 0.0; break;
            case -1: b = -(double) digits[0]; break;
            case  1: b = (double) digits[0]; break;
            case -2:
            case 2:
                if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (1 * PyLong_SHIFT < 53))) {
                    b = (double) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -2)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -3:
            case 3:
                if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (2 * PyLong_SHIFT < 53))) {
                    b = (double) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -3)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            case -4:
            case 4:
                if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT && ((8 * sizeof(unsigned long) < 53) || (3 * PyLong_SHIFT < 53))) {
                    b = (double) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
                    if ((8 * sizeof(unsigned long) < 53) || (4 * PyLong_SHIFT < 53) || (b < (double) ((PY_LONG_LONG)1 << 53))) {
                        if (size == -4)
                            b = -b;
                        break;
                    }
                }
                CYTHON_FALLTHROUGH;
            default:
        #else
        {
        #endif
            b = PyLong_AsDouble(op2);
            if (unlikely(b == -1.0 && PyErr_Occurred())) return NULL;
            
        }
    } else {
        return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2);
    }
        
        PyFPE_START_PROTECT("add", return NULL)
        result = a + b;
        PyFPE_END_PROTECT(result)
        return PyFloat_FromDouble(result);
}
#endif

/* DictGetItem */
          #if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
    PyObject *value;
    value = PyDict_GetItemWithError(d, key);
    if (unlikely(!value)) {
        if (!PyErr_Occurred()) {
            if (unlikely(PyTuple_Check(key))) {
                PyObject* args = PyTuple_Pack(1, key);
                if (likely(args)) {
                    PyErr_SetObject(PyExc_KeyError, args);
                    Py_DECREF(args);
                }
            } else {
                PyErr_SetObject(PyExc_KeyError, key);
            }
        }
        return NULL;
    }
    Py_INCREF(value);
    return value;
}
#endif

/* CalculateMetaclass */
          static PyObject *__Pyx_CalculateMetaclass(PyTypeObject *metaclass, PyObject *bases) {
    Py_ssize_t i, nbases = PyTuple_GET_SIZE(bases);
    for (i=0; i < nbases; i++) {
        PyTypeObject *tmptype;
        PyObject *tmp = PyTuple_GET_ITEM(bases, i);
        tmptype = Py_TYPE(tmp);
#if PY_MAJOR_VERSION < 3
        if (tmptype == &PyClass_Type)
            continue;
#endif
        if (!metaclass) {
            metaclass = tmptype;
            continue;
        }
        if (PyType_IsSubtype(metaclass, tmptype))
            continue;
        if (PyType_IsSubtype(tmptype, metaclass)) {
            metaclass = tmptype;
            continue;
        }
        PyErr_SetString(PyExc_TypeError,
                        "metaclass conflict: "
                        "the metaclass of a derived class "
                        "must be a (non-strict) subclass "
                        "of the metaclasses of all its bases");
        return NULL;
    }
    if (!metaclass) {
#if PY_MAJOR_VERSION < 3
        metaclass = &PyClass_Type;
#else
        metaclass = &PyType_Type;
#endif
    }
    Py_INCREF((PyObject*) metaclass);
    return (PyObject*) metaclass;
}

/* ClassMethod */
          static PyObject* __Pyx_Method_ClassMethod(PyObject *method) {
#if CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM <= 0x05080000
    if (PyObject_TypeCheck(method, &PyWrapperDescr_Type)) {
        return PyClassMethod_New(method);
    }
#else
#if CYTHON_COMPILING_IN_PYSTON || CYTHON_COMPILING_IN_PYPY
    if (PyMethodDescr_Check(method))
#else
    static PyTypeObject *methoddescr_type = NULL;
    if (methoddescr_type == NULL) {
       PyObject *meth = PyObject_GetAttrString((PyObject*)&PyList_Type, "append");
       if (!meth) return NULL;
       methoddescr_type = Py_TYPE(meth);
       Py_DECREF(meth);
    }
    if (__Pyx_TypeCheck(method, methoddescr_type))
#endif
    {
        PyMethodDescrObject *descr = (PyMethodDescrObject *)method;
        #if PY_VERSION_HEX < 0x03020000
        PyTypeObject *d_type = descr->d_type;
        #else
        PyTypeObject *d_type = descr->d_common.d_type;
        #endif
        return PyDescr_NewClassMethod(d_type, descr->d_method);
    }
#endif
    else if (PyMethod_Check(method)) {
        return PyClassMethod_New(PyMethod_GET_FUNCTION(method));
    }
    else if (PyCFunction_Check(method)) {
        return PyClassMethod_New(method);
    }
#ifdef __Pyx_CyFunction_USED
    else if (__Pyx_CyFunction_Check(method)) {
        return PyClassMethod_New(method);
    }
#endif
    PyErr_SetString(PyExc_TypeError,
                   "Class-level classmethod() can only be called on "
                   "a method_descriptor or instance method.");
    return NULL;
}

/* FetchCommonType */
          static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) {
    PyObject* fake_module;
    PyTypeObject* cached_type = NULL;
    fake_module = PyImport_AddModule((char*) "_cython_" CYTHON_ABI);
    if (!fake_module) return NULL;
    Py_INCREF(fake_module);
    cached_type = (PyTypeObject*) PyObject_GetAttrString(fake_module, type->tp_name);
    if (cached_type) {
        if (!PyType_Check((PyObject*)cached_type)) {
            PyErr_Format(PyExc_TypeError,
                "Shared Cython type %.200s is not a type object",
                type->tp_name);
            goto bad;
        }
        if (cached_type->tp_basicsize != type->tp_basicsize) {
            PyErr_Format(PyExc_TypeError,
                "Shared Cython type %.200s has the wrong size, try recompiling",
                type->tp_name);
            goto bad;
        }
    } else {
        if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
        PyErr_Clear();
        if (PyType_Ready(type) < 0) goto bad;
        if (PyObject_SetAttrString(fake_module, type->tp_name, (PyObject*) type) < 0)
            goto bad;
        Py_INCREF(type);
        cached_type = type;
    }
done:
    Py_DECREF(fake_module);
    return cached_type;
bad:
    Py_XDECREF(cached_type);
    cached_type = NULL;
    goto done;
}

/* CythonFunctionShared */
          #include <structmember.h>
static PyObject *
__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
{
    if (unlikely(op->func_doc == NULL)) {
        if (op->func.m_ml->ml_doc) {
#if PY_MAJOR_VERSION >= 3
            op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc);
#else
            op->func_doc = PyString_FromString(op->func.m_ml->ml_doc);
#endif
            if (unlikely(op->func_doc == NULL))
                return NULL;
        } else {
            Py_INCREF(Py_None);
            return Py_None;
        }
    }
    Py_INCREF(op->func_doc);
    return op->func_doc;
}
static int
__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
{
    PyObject *tmp = op->func_doc;
    if (value == NULL) {
        value = Py_None;
    }
    Py_INCREF(value);
    op->func_doc = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    if (unlikely(op->func_name == NULL)) {
#if PY_MAJOR_VERSION >= 3
        op->func_name = PyUnicode_InternFromString(op->func.m_ml->ml_name);
#else
        op->func_name = PyString_InternFromString(op->func.m_ml->ml_name);
#endif
        if (unlikely(op->func_name == NULL))
            return NULL;
    }
    Py_INCREF(op->func_name);
    return op->func_name;
}
static int
__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
{
    PyObject *tmp;
#if PY_MAJOR_VERSION >= 3
    if (unlikely(value == NULL || !PyUnicode_Check(value)))
#else
    if (unlikely(value == NULL || !PyString_Check(value)))
#endif
    {
        PyErr_SetString(PyExc_TypeError,
                        "__name__ must be set to a string object");
        return -1;
    }
    tmp = op->func_name;
    Py_INCREF(value);
    op->func_name = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    Py_INCREF(op->func_qualname);
    return op->func_qualname;
}
static int
__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
{
    PyObject *tmp;
#if PY_MAJOR_VERSION >= 3
    if (unlikely(value == NULL || !PyUnicode_Check(value)))
#else
    if (unlikely(value == NULL || !PyString_Check(value)))
#endif
    {
        PyErr_SetString(PyExc_TypeError,
                        "__qualname__ must be set to a string object");
        return -1;
    }
    tmp = op->func_qualname;
    Py_INCREF(value);
    op->func_qualname = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure)
{
    PyObject *self;
    self = m->func_closure;
    if (self == NULL)
        self = Py_None;
    Py_INCREF(self);
    return self;
}
static PyObject *
__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    if (unlikely(op->func_dict == NULL)) {
        op->func_dict = PyDict_New();
        if (unlikely(op->func_dict == NULL))
            return NULL;
    }
    Py_INCREF(op->func_dict);
    return op->func_dict;
}
static int
__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, CYTHON_UNUSED void *context)
{
    PyObject *tmp;
    if (unlikely(value == NULL)) {
        PyErr_SetString(PyExc_TypeError,
               "function's dictionary may not be deleted");
        return -1;
    }
    if (unlikely(!PyDict_Check(value))) {
        PyErr_SetString(PyExc_TypeError,
               "setting function's dictionary to a non-dict");
        return -1;
    }
    tmp = op->func_dict;
    Py_INCREF(value);
    op->func_dict = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    Py_INCREF(op->func_globals);
    return op->func_globals;
}
static PyObject *
__Pyx_CyFunction_get_closure(CYTHON_UNUSED __pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    Py_INCREF(Py_None);
    return Py_None;
}
static PyObject *
__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context)
{
    PyObject* result = (op->func_code) ? op->func_code : Py_None;
    Py_INCREF(result);
    return result;
}
static int
__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) {
    int result = 0;
    PyObject *res = op->defaults_getter((PyObject *) op);
    if (unlikely(!res))
        return -1;
    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
    op->defaults_tuple = PyTuple_GET_ITEM(res, 0);
    Py_INCREF(op->defaults_tuple);
    op->defaults_kwdict = PyTuple_GET_ITEM(res, 1);
    Py_INCREF(op->defaults_kwdict);
    #else
    op->defaults_tuple = PySequence_ITEM(res, 0);
    if (unlikely(!op->defaults_tuple)) result = -1;
    else {
        op->defaults_kwdict = PySequence_ITEM(res, 1);
        if (unlikely(!op->defaults_kwdict)) result = -1;
    }
    #endif
    Py_DECREF(res);
    return result;
}
static int
__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
    PyObject* tmp;
    if (!value) {
        value = Py_None;
    } else if (value != Py_None && !PyTuple_Check(value)) {
        PyErr_SetString(PyExc_TypeError,
                        "__defaults__ must be set to a tuple object");
        return -1;
    }
    Py_INCREF(value);
    tmp = op->defaults_tuple;
    op->defaults_tuple = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
    PyObject* result = op->defaults_tuple;
    if (unlikely(!result)) {
        if (op->defaults_getter) {
            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL;
            result = op->defaults_tuple;
        } else {
            result = Py_None;
        }
    }
    Py_INCREF(result);
    return result;
}
static int
__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
    PyObject* tmp;
    if (!value) {
        value = Py_None;
    } else if (value != Py_None && !PyDict_Check(value)) {
        PyErr_SetString(PyExc_TypeError,
                        "__kwdefaults__ must be set to a dict object");
        return -1;
    }
    Py_INCREF(value);
    tmp = op->defaults_kwdict;
    op->defaults_kwdict = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
    PyObject* result = op->defaults_kwdict;
    if (unlikely(!result)) {
        if (op->defaults_getter) {
            if (__Pyx_CyFunction_init_defaults(op) < 0) return NULL;
            result = op->defaults_kwdict;
        } else {
            result = Py_None;
        }
    }
    Py_INCREF(result);
    return result;
}
static int
__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, CYTHON_UNUSED void *context) {
    PyObject* tmp;
    if (!value || value == Py_None) {
        value = NULL;
    } else if (!PyDict_Check(value)) {
        PyErr_SetString(PyExc_TypeError,
                        "__annotations__ must be set to a dict object");
        return -1;
    }
    Py_XINCREF(value);
    tmp = op->func_annotations;
    op->func_annotations = value;
    Py_XDECREF(tmp);
    return 0;
}
static PyObject *
__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *context) {
    PyObject* result = op->func_annotations;
    if (unlikely(!result)) {
        result = PyDict_New();
        if (unlikely(!result)) return NULL;
        op->func_annotations = result;
    }
    Py_INCREF(result);
    return result;
}
static PyGetSetDef __pyx_CyFunction_getsets[] = {
    {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
    {(char *) "__doc__",  (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
    {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
    {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
    {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0},
    {(char *) "__self__", (getter)__Pyx_CyFunction_get_self, 0, 0, 0},
    {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
    {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
    {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
    {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
    {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
    {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
    {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
    {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
    {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
    {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
    {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0},
    {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0},
    {0, 0, 0, 0, 0}
};
static PyMemberDef __pyx_CyFunction_members[] = {
    {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), PY_WRITE_RESTRICTED, 0},
    {0, 0, 0,  0, 0}
};
static PyObject *
__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, CYTHON_UNUSED PyObject *args)
{
#if PY_MAJOR_VERSION >= 3
    return PyUnicode_FromString(m->func.m_ml->ml_name);
#else
    return PyString_FromString(m->func.m_ml->ml_name);
#endif
}
static PyMethodDef __pyx_CyFunction_methods[] = {
    {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
    {0, 0, 0, 0}
};
#if PY_VERSION_HEX < 0x030500A0
#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist)
#else
#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist)
#endif
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
                                       PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
    if (unlikely(op == NULL))
        return NULL;
    op->flags = flags;
    __Pyx_CyFunction_weakreflist(op) = NULL;
    op->func.m_ml = ml;
    op->func.m_self = (PyObject *) op;
    Py_XINCREF(closure);
    op->func_closure = closure;
    Py_XINCREF(module);
    op->func.m_module = module;
    op->func_dict = NULL;
    op->func_name = NULL;
    Py_INCREF(qualname);
    op->func_qualname = qualname;
    op->func_doc = NULL;
    op->func_classobj = NULL;
    op->func_globals = globals;
    Py_INCREF(op->func_globals);
    Py_XINCREF(code);
    op->func_code = code;
    op->defaults_pyobjects = 0;
    op->defaults_size = 0;
    op->defaults = NULL;
    op->defaults_tuple = NULL;
    op->defaults_kwdict = NULL;
    op->defaults_getter = NULL;
    op->func_annotations = NULL;
    return (PyObject *) op;
}
static int
__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
{
    Py_CLEAR(m->func_closure);
    Py_CLEAR(m->func.m_module);
    Py_CLEAR(m->func_dict);
    Py_CLEAR(m->func_name);
    Py_CLEAR(m->func_qualname);
    Py_CLEAR(m->func_doc);
    Py_CLEAR(m->func_globals);
    Py_CLEAR(m->func_code);
    Py_CLEAR(m->func_classobj);
    Py_CLEAR(m->defaults_tuple);
    Py_CLEAR(m->defaults_kwdict);
    Py_CLEAR(m->func_annotations);
    if (m->defaults) {
        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
        int i;
        for (i = 0; i < m->defaults_pyobjects; i++)
            Py_XDECREF(pydefaults[i]);
        PyObject_Free(m->defaults);
        m->defaults = NULL;
    }
    return 0;
}
static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m)
{
    if (__Pyx_CyFunction_weakreflist(m) != NULL)
        PyObject_ClearWeakRefs((PyObject *) m);
    __Pyx_CyFunction_clear(m);
    PyObject_GC_Del(m);
}
static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m)
{
    PyObject_GC_UnTrack(m);
    __Pyx__CyFunction_dealloc(m);
}
static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg)
{
    Py_VISIT(m->func_closure);
    Py_VISIT(m->func.m_module);
    Py_VISIT(m->func_dict);
    Py_VISIT(m->func_name);
    Py_VISIT(m->func_qualname);
    Py_VISIT(m->func_doc);
    Py_VISIT(m->func_globals);
    Py_VISIT(m->func_code);
    Py_VISIT(m->func_classobj);
    Py_VISIT(m->defaults_tuple);
    Py_VISIT(m->defaults_kwdict);
    if (m->defaults) {
        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
        int i;
        for (i = 0; i < m->defaults_pyobjects; i++)
            Py_VISIT(pydefaults[i]);
    }
    return 0;
}
static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObject *type)
{
    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
    if (m->flags & __Pyx_CYFUNCTION_STATICMETHOD) {
        Py_INCREF(func);
        return func;
    }
    if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) {
        if (type == NULL)
            type = (PyObject *)(Py_TYPE(obj));
        return __Pyx_PyMethod_New(func, type, (PyObject *)(Py_TYPE(type)));
    }
    if (obj == Py_None)
        obj = NULL;
    return __Pyx_PyMethod_New(func, obj, type);
}
static PyObject*
__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
{
#if PY_MAJOR_VERSION >= 3
    return PyUnicode_FromFormat("<cyfunction %U at %p>",
                                op->func_qualname, (void *)op);
#else
    return PyString_FromFormat("<cyfunction %s at %p>",
                               PyString_AsString(op->func_qualname), (void *)op);
#endif
}
static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) {
    PyCFunctionObject* f = (PyCFunctionObject*)func;
    PyCFunction meth = f->m_ml->ml_meth;
    Py_ssize_t size;
    switch (f->m_ml->ml_flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) {
    case METH_VARARGS:
        if (likely(kw == NULL || PyDict_Size(kw) == 0))
            return (*meth)(self, arg);
        break;
    case METH_VARARGS | METH_KEYWORDS:
        return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw);
    case METH_NOARGS:
        if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
            size = PyTuple_GET_SIZE(arg);
            if (likely(size == 0))
                return (*meth)(self, NULL);
            PyErr_Format(PyExc_TypeError,
                "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
                f->m_ml->ml_name, size);
            return NULL;
        }
        break;
    case METH_O:
        if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
            size = PyTuple_GET_SIZE(arg);
            if (likely(size == 1)) {
                PyObject *result, *arg0;
                #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
                arg0 = PyTuple_GET_ITEM(arg, 0);
                #else
                arg0 = PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL;
                #endif
                result = (*meth)(self, arg0);
                #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
                Py_DECREF(arg0);
                #endif
                return result;
            }
            PyErr_Format(PyExc_TypeError,
                "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
                f->m_ml->ml_name, size);
            return NULL;
        }
        break;
    default:
        PyErr_SetString(PyExc_SystemError, "Bad call flags in "
                        "__Pyx_CyFunction_Call. METH_OLDARGS is no "
                        "longer supported!");
        return NULL;
    }
    PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
                 f->m_ml->ml_name);
    return NULL;
}
static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) {
    return __Pyx_CyFunction_CallMethod(func, ((PyCFunctionObject*)func)->m_self, arg, kw);
}
static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) {
    PyObject *result;
    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
    if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
        Py_ssize_t argc;
        PyObject *new_args;
        PyObject *self;
        argc = PyTuple_GET_SIZE(args);
        new_args = PyTuple_GetSlice(args, 1, argc);
        if (unlikely(!new_args))
            return NULL;
        self = PyTuple_GetItem(args, 0);
        if (unlikely(!self)) {
            Py_DECREF(new_args);
            return NULL;
        }
        result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw);
        Py_DECREF(new_args);
    } else {
        result = __Pyx_CyFunction_Call(func, args, kw);
    }
    return result;
}
static PyTypeObject __pyx_CyFunctionType_type = {
    PyVarObject_HEAD_INIT(0, 0)
    "cython_function_or_method",
    sizeof(__pyx_CyFunctionObject),
    0,
    (destructor) __Pyx_CyFunction_dealloc,
    0,
    0,
    0,
#if PY_MAJOR_VERSION < 3
    0,
#else
    0,
#endif
    (reprfunc) __Pyx_CyFunction_repr,
    0,
    0,
    0,
    0,
    __Pyx_CyFunction_CallAsMethod,
    0,
    0,
    0,
    0,
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
    0,
    (traverseproc) __Pyx_CyFunction_traverse,
    (inquiry) __Pyx_CyFunction_clear,
    0,
#if PY_VERSION_HEX < 0x030500A0
    offsetof(__pyx_CyFunctionObject, func_weakreflist),
#else
    offsetof(PyCFunctionObject, m_weakreflist),
#endif
    0,
    0,
    __pyx_CyFunction_methods,
    __pyx_CyFunction_members,
    __pyx_CyFunction_getsets,
    0,
    0,
    __Pyx_CyFunction_descr_get,
    0,
    offsetof(__pyx_CyFunctionObject, func_dict),
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
    0,
#if PY_VERSION_HEX >= 0x030400a1
    0,
#endif
#if PY_VERSION_HEX >= 0x030800b1
    0,
#endif
#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
    0,
#endif
};
static int __pyx_CyFunction_init(void) {
    __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type);
    if (unlikely(__pyx_CyFunctionType == NULL)) {
        return -1;
    }
    return 0;
}
static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) {
    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
    m->defaults = PyObject_Malloc(size);
    if (unlikely(!m->defaults))
        return PyErr_NoMemory();
    memset(m->defaults, 0, size);
    m->defaults_pyobjects = pyobjects;
    m->defaults_size = size;
    return m->defaults;
}
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
    m->defaults_tuple = tuple;
    Py_INCREF(tuple);
}
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) {
    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
    m->defaults_kwdict = dict;
    Py_INCREF(dict);
}
static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) {
    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
    m->func_annotations = dict;
    Py_INCREF(dict);
}

/* CythonFunction */
          static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
                                      PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
    PyObject *op = __Pyx_CyFunction_Init(
        PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
        ml, flags, qualname, closure, module, globals, code
    );
    if (likely(op)) {
        PyObject_GC_Track(op);
    }
    return op;
}

/* Py3ClassCreate */
          static PyObject *__Pyx_Py3MetaclassPrepare(PyObject *metaclass, PyObject *bases, PyObject *name,
                                           PyObject *qualname, PyObject *mkw, PyObject *modname, PyObject *doc) {
    PyObject *ns;
    if (metaclass) {
        PyObject *prep = __Pyx_PyObject_GetAttrStr(metaclass, __pyx_n_s_prepare);
        if (prep) {
            PyObject *pargs = PyTuple_Pack(2, name, bases);
            if (unlikely(!pargs)) {
                Py_DECREF(prep);
                return NULL;
            }
            ns = PyObject_Call(prep, pargs, mkw);
            Py_DECREF(prep);
            Py_DECREF(pargs);
        } else {
            if (unlikely(!PyErr_ExceptionMatches(PyExc_AttributeError)))
                return NULL;
            PyErr_Clear();
            ns = PyDict_New();
        }
    } else {
        ns = PyDict_New();
    }
    if (unlikely(!ns))
        return NULL;
    if (unlikely(PyObject_SetItem(ns, __pyx_n_s_module, modname) < 0)) goto bad;
    if (unlikely(PyObject_SetItem(ns, __pyx_n_s_qualname, qualname) < 0)) goto bad;
    if (unlikely(doc && PyObject_SetItem(ns, __pyx_n_s_doc, doc) < 0)) goto bad;
    return ns;
bad:
    Py_DECREF(ns);
    return NULL;
}
static PyObject *__Pyx_Py3ClassCreate(PyObject *metaclass, PyObject *name, PyObject *bases,
                                      PyObject *dict, PyObject *mkw,
                                      int calculate_metaclass, int allow_py2_metaclass) {
    PyObject *result, *margs;
    PyObject *owned_metaclass = NULL;
    if (allow_py2_metaclass) {
        owned_metaclass = PyObject_GetItem(dict, __pyx_n_s_metaclass);
        if (owned_metaclass) {
            metaclass = owned_metaclass;
        } else if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) {
            PyErr_Clear();
        } else {
            return NULL;
        }
    }
    if (calculate_metaclass && (!metaclass || PyType_Check(metaclass))) {
        metaclass = __Pyx_CalculateMetaclass((PyTypeObject*) metaclass, bases);
        Py_XDECREF(owned_metaclass);
        if (unlikely(!metaclass))
            return NULL;
        owned_metaclass = metaclass;
    }
    margs = PyTuple_Pack(3, name, bases, dict);
    if (unlikely(!margs)) {
        result = NULL;
    } else {
        result = PyObject_Call(metaclass, margs, mkw);
        Py_DECREF(margs);
    }
    Py_XDECREF(owned_metaclass);
    return result;
}

/* CLineInTraceback */
          #ifndef CYTHON_CLINE_IN_TRACEBACK
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
    PyObject *use_cline;
    PyObject *ptype, *pvalue, *ptraceback;
#if CYTHON_COMPILING_IN_CPYTHON
    PyObject **cython_runtime_dict;
#endif
    if (unlikely(!__pyx_cython_runtime)) {
        return c_line;
    }
    __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
#if CYTHON_COMPILING_IN_CPYTHON
    cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime);
    if (likely(cython_runtime_dict)) {
        __PYX_PY_DICT_LOOKUP_IF_MODIFIED(
            use_cline, *cython_runtime_dict,
            __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback))
    } else
#endif
    {
      PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback);
      if (use_cline_obj) {
        use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True;
        Py_DECREF(use_cline_obj);
      } else {
        PyErr_Clear();
        use_cline = NULL;
      }
    }
    if (!use_cline) {
        c_line = 0;
        PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False);
    }
    else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) {
        c_line = 0;
    }
    __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
    return c_line;
}
#endif

/* CodeObjectCache */
          static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
    int start = 0, mid = 0, end = count - 1;
    if (end >= 0 && code_line > entries[end].code_line) {
        return count;
    }
    while (start < end) {
        mid = start + (end - start) / 2;
        if (code_line < entries[mid].code_line) {
            end = mid;
        } else if (code_line > entries[mid].code_line) {
             start = mid + 1;
        } else {
            return mid;
        }
    }
    if (code_line <= entries[mid].code_line) {
        return mid;
    } else {
        return mid + 1;
    }
}
static PyCodeObject *__pyx_find_code_object(int code_line) {
    PyCodeObject* code_object;
    int pos;
    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
        return NULL;
    }
    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
        return NULL;
    }
    code_object = __pyx_code_cache.entries[pos].code_object;
    Py_INCREF(code_object);
    return code_object;
}
static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
    int pos, i;
    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
    if (unlikely(!code_line)) {
        return;
    }
    if (unlikely(!entries)) {
        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
        if (likely(entries)) {
            __pyx_code_cache.entries = entries;
            __pyx_code_cache.max_count = 64;
            __pyx_code_cache.count = 1;
            entries[0].code_line = code_line;
            entries[0].code_object = code_object;
            Py_INCREF(code_object);
        }
        return;
    }
    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
        PyCodeObject* tmp = entries[pos].code_object;
        entries[pos].code_object = code_object;
        Py_DECREF(tmp);
        return;
    }
    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
        int new_max = __pyx_code_cache.max_count + 64;
        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
            __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
        if (unlikely(!entries)) {
            return;
        }
        __pyx_code_cache.entries = entries;
        __pyx_code_cache.max_count = new_max;
    }
    for (i=__pyx_code_cache.count; i>pos; i--) {
        entries[i] = entries[i-1];
    }
    entries[pos].code_line = code_line;
    entries[pos].code_object = code_object;
    __pyx_code_cache.count++;
    Py_INCREF(code_object);
}

/* AddTraceback */
          #include "compile.h"
#include "frameobject.h"
#include "traceback.h"
static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
            const char *funcname, int c_line,
            int py_line, const char *filename) {
    PyCodeObject *py_code = 0;
    PyObject *py_srcfile = 0;
    PyObject *py_funcname = 0;
    #if PY_MAJOR_VERSION < 3
    py_srcfile = PyString_FromString(filename);
    #else
    py_srcfile = PyUnicode_FromString(filename);
    #endif
    if (!py_srcfile) goto bad;
    if (c_line) {
        #if PY_MAJOR_VERSION < 3
        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
        #else
        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
        #endif
    }
    else {
        #if PY_MAJOR_VERSION < 3
        py_funcname = PyString_FromString(funcname);
        #else
        py_funcname = PyUnicode_FromString(funcname);
        #endif
    }
    if (!py_funcname) goto bad;
    py_code = __Pyx_PyCode_New(
        0,
        0,
        0,
        0,
        0,
        __pyx_empty_bytes, /*PyObject *code,*/
        __pyx_empty_tuple, /*PyObject *consts,*/
        __pyx_empty_tuple, /*PyObject *names,*/
        __pyx_empty_tuple, /*PyObject *varnames,*/
        __pyx_empty_tuple, /*PyObject *freevars,*/
        __pyx_empty_tuple, /*PyObject *cellvars,*/
        py_srcfile,   /*PyObject *filename,*/
        py_funcname,  /*PyObject *name,*/
        py_line,
        __pyx_empty_bytes  /*PyObject *lnotab*/
    );
    Py_DECREF(py_srcfile);
    Py_DECREF(py_funcname);
    return py_code;
bad:
    Py_XDECREF(py_srcfile);
    Py_XDECREF(py_funcname);
    return NULL;
}
static void __Pyx_AddTraceback(const char *funcname, int c_line,
                               int py_line, const char *filename) {
    PyCodeObject *py_code = 0;
    PyFrameObject *py_frame = 0;
    PyThreadState *tstate = __Pyx_PyThreadState_Current;
    if (c_line) {
        c_line = __Pyx_CLineForTraceback(tstate, c_line);
    }
    py_code = __pyx_find_code_object(c_line ? -c_line : py_line);
    if (!py_code) {
        py_code = __Pyx_CreateCodeObjectForTraceback(
            funcname, c_line, py_line, filename);
        if (!py_code) goto bad;
        __pyx_insert_code_object(c_line ? -c_line : py_line, py_code);
    }
    py_frame = PyFrame_New(
        tstate,            /*PyThreadState *tstate,*/
        py_code,           /*PyCodeObject *code,*/
        __pyx_d,    /*PyObject *globals,*/
        0                  /*PyObject *locals*/
    );
    if (!py_frame) goto bad;
    __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
    PyTraceBack_Here(py_frame);
bad:
    Py_XDECREF(py_code);
    Py_XDECREF(py_frame);
}

/* CIntToPy */
          static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
    const long neg_one = (long) ((long) 0 - (long) 1), const_zero = (long) 0;
    const int is_unsigned = neg_one > const_zero;
    if (is_unsigned) {
        if (sizeof(long) < sizeof(long)) {
            return PyInt_FromLong((long) value);
        } else if (sizeof(long) <= sizeof(unsigned long)) {
            return PyLong_FromUnsignedLong((unsigned long) value);
#ifdef HAVE_LONG_LONG
        } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
#endif
        }
    } else {
        if (sizeof(long) <= sizeof(long)) {
            return PyInt_FromLong((long) value);
#ifdef HAVE_LONG_LONG
        } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
            return PyLong_FromLongLong((PY_LONG_LONG) value);
#endif
        }
    }
    {
        int one = 1; int little = (int)*(unsigned char *)&one;
        unsigned char *bytes = (unsigned char *)&value;
        return _PyLong_FromByteArray(bytes, sizeof(long),
                                     little, !is_unsigned);
    }
}

/* CIntFromPyVerify */
          #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
    {\
        func_type value = func_value;\
        if (sizeof(target_type) < sizeof(func_type)) {\
            if (unlikely(value != (func_type) (target_type) value)) {\
                func_type zero = 0;\
                if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
                    return (target_type) -1;\
                if (is_unsigned && unlikely(value < zero))\
                    goto raise_neg_overflow;\
                else\
                    goto raise_overflow;\
            }\
        }\
        return (target_type) value;\
    }

/* CIntFromPy */
          static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
    const long neg_one = (long) ((long) 0 - (long) 1), const_zero = (long) 0;
    const int is_unsigned = neg_one > const_zero;
#if PY_MAJOR_VERSION < 3
    if (likely(PyInt_Check(x))) {
        if (sizeof(long) < sizeof(long)) {
            __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
        } else {
            long val = PyInt_AS_LONG(x);
            if (is_unsigned && unlikely(val < 0)) {
                goto raise_neg_overflow;
            }
            return (long) val;
        }
    } else
#endif
    if (likely(PyLong_Check(x))) {
        if (is_unsigned) {
#if CYTHON_USE_PYLONG_INTERNALS
            const digit* digits = ((PyLongObject*)x)->ob_digit;
            switch (Py_SIZE(x)) {
                case  0: return (long) 0;
                case  1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0])
                case 2:
                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) {
                            return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
                        }
                    }
                    break;
                case 3:
                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) {
                            return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
                        }
                    }
                    break;
                case 4:
                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) {
                            return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
                        }
                    }
                    break;
            }
#endif
#if CYTHON_COMPILING_IN_CPYTHON
            if (unlikely(Py_SIZE(x) < 0)) {
                goto raise_neg_overflow;
            }
#else
            {
                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
                if (unlikely(result < 0))
                    return (long) -1;
                if (unlikely(result == 1))
                    goto raise_neg_overflow;
            }
#endif
            if (sizeof(long) <= sizeof(unsigned long)) {
                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
#ifdef HAVE_LONG_LONG
            } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
#endif
            }
        } else {
#if CYTHON_USE_PYLONG_INTERNALS
            const digit* digits = ((PyLongObject*)x)->ob_digit;
            switch (Py_SIZE(x)) {
                case  0: return (long) 0;
                case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0]))
                case  1: __PYX_VERIFY_RETURN_INT(long,  digit, +digits[0])
                case -2:
                    if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                            return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
                case 2:
                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                            return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
                case -3:
                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                            return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
                case 3:
                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                            return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
                case -4:
                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                            return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
                case 4:
                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
                            return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
                        }
                    }
                    break;
            }
#endif
            if (sizeof(long) <= sizeof(long)) {
                __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
#ifdef HAVE_LONG_LONG
            } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
                __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
#endif
            }
        }
        {
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
            PyErr_SetString(PyExc_RuntimeError,
                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
#else
            long val;
            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
 #if PY_MAJOR_VERSION < 3
            if (likely(v) && !PyLong_Check(v)) {
                PyObject *tmp = v;
                v = PyNumber_Long(tmp);
                Py_DECREF(tmp);
            }
 #endif
            if (likely(v)) {
                int one = 1; int is_little = (int)*(unsigned char *)&one;
                unsigned char *bytes = (unsigned char *)&val;
                int ret = _PyLong_AsByteArray((PyLongObject *)v,
                                              bytes, sizeof(val),
                                              is_little, !is_unsigned);
                Py_DECREF(v);
                if (likely(!ret))
                    return val;
            }
#endif
            return (long) -1;
        }
    } else {
        long val;
        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
        if (!tmp) return (long) -1;
        val = __Pyx_PyInt_As_long(tmp);
        Py_DECREF(tmp);
        return val;
    }
raise_overflow:
    PyErr_SetString(PyExc_OverflowError,
        "value too large to convert to long");
    return (long) -1;
raise_neg_overflow:
    PyErr_SetString(PyExc_OverflowError,
        "can't convert negative value to long");
    return (long) -1;
}

/* CIntFromPy */
          static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
    const int neg_one = (int) ((int) 0 - (int) 1), const_zero = (int) 0;
    const int is_unsigned = neg_one > const_zero;
#if PY_MAJOR_VERSION < 3
    if (likely(PyInt_Check(x))) {
        if (sizeof(int) < sizeof(long)) {
            __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
        } else {
            long val = PyInt_AS_LONG(x);
            if (is_unsigned && unlikely(val < 0)) {
                goto raise_neg_overflow;
            }
            return (int) val;
        }
    } else
#endif
    if (likely(PyLong_Check(x))) {
        if (is_unsigned) {
#if CYTHON_USE_PYLONG_INTERNALS
            const digit* digits = ((PyLongObject*)x)->ob_digit;
            switch (Py_SIZE(x)) {
                case  0: return (int) 0;
                case  1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0])
                case 2:
                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) {
                            return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
                        }
                    }
                    break;
                case 3:
                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) {
                            return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
                        }
                    }
                    break;
                case 4:
                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) {
                            return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
                        }
                    }
                    break;
            }
#endif
#if CYTHON_COMPILING_IN_CPYTHON
            if (unlikely(Py_SIZE(x) < 0)) {
                goto raise_neg_overflow;
            }
#else
            {
                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
                if (unlikely(result < 0))
                    return (int) -1;
                if (unlikely(result == 1))
                    goto raise_neg_overflow;
            }
#endif
            if (sizeof(int) <= sizeof(unsigned long)) {
                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
#ifdef HAVE_LONG_LONG
            } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
#endif
            }
        } else {
#if CYTHON_USE_PYLONG_INTERNALS
            const digit* digits = ((PyLongObject*)x)->ob_digit;
            switch (Py_SIZE(x)) {
                case  0: return (int) 0;
                case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0]))
                case  1: __PYX_VERIFY_RETURN_INT(int,  digit, +digits[0])
                case -2:
                    if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
                            return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
                case 2:
                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
                            return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
                case -3:
                    if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
                            return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
                case 3:
                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
                            return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
                case -4:
                    if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
                            return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
                case 4:
                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
                            return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
                        }
                    }
                    break;
            }
#endif
            if (sizeof(int) <= sizeof(long)) {
                __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
#ifdef HAVE_LONG_LONG
            } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
                __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
#endif
            }
        }
        {
#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
            PyErr_SetString(PyExc_RuntimeError,
                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
#else
            int val;
            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
 #if PY_MAJOR_VERSION < 3
            if (likely(v) && !PyLong_Check(v)) {
                PyObject *tmp = v;
                v = PyNumber_Long(tmp);
                Py_DECREF(tmp);
            }
 #endif
            if (likely(v)) {
                int one = 1; int is_little = (int)*(unsigned char *)&one;
                unsigned char *bytes = (unsigned char *)&val;
                int ret = _PyLong_AsByteArray((PyLongObject *)v,
                                              bytes, sizeof(val),
                                              is_little, !is_unsigned);
                Py_DECREF(v);
                if (likely(!ret))
                    return val;
            }
#endif
            return (int) -1;
        }
    } else {
        int val;
        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
        if (!tmp) return (int) -1;
        val = __Pyx_PyInt_As_int(tmp);
        Py_DECREF(tmp);
        return val;
    }
raise_overflow:
    PyErr_SetString(PyExc_OverflowError,
        "value too large to convert to int");
    return (int) -1;
raise_neg_overflow:
    PyErr_SetString(PyExc_OverflowError,
        "can't convert negative value to int");
    return (int) -1;
}

/* FastTypeChecks */
          #if CYTHON_COMPILING_IN_CPYTHON
static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
    while (a) {
        a = a->tp_base;
        if (a == b)
            return 1;
    }
    return b == &PyBaseObject_Type;
}
static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) {
    PyObject *mro;
    if (a == b) return 1;
    mro = a->tp_mro;
    if (likely(mro)) {
        Py_ssize_t i, n;
        n = PyTuple_GET_SIZE(mro);
        for (i = 0; i < n; i++) {
            if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b)
                return 1;
        }
        return 0;
    }
    return __Pyx_InBases(a, b);
}
#if PY_MAJOR_VERSION == 2
static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) {
    PyObject *exception, *value, *tb;
    int res;
    __Pyx_PyThreadState_declare
    __Pyx_PyThreadState_assign
    __Pyx_ErrFetch(&exception, &value, &tb);
    res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0;
    if (unlikely(res == -1)) {
        PyErr_WriteUnraisable(err);
        res = 0;
    }
    if (!res) {
        res = PyObject_IsSubclass(err, exc_type2);
        if (unlikely(res == -1)) {
            PyErr_WriteUnraisable(err);
            res = 0;
        }
    }
    __Pyx_ErrRestore(exception, value, tb);
    return res;
}
#else
static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) {
    int res = exc_type1 ? __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type1) : 0;
    if (!res) {
        res = __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2);
    }
    return res;
}
#endif
static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
    Py_ssize_t i, n;
    assert(PyExceptionClass_Check(exc_type));
    n = PyTuple_GET_SIZE(tuple);
#if PY_MAJOR_VERSION >= 3
    for (i=0; i<n; i++) {
        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
    }
#endif
    for (i=0; i<n; i++) {
        PyObject *t = PyTuple_GET_ITEM(tuple, i);
        #if PY_MAJOR_VERSION < 3
        if (likely(exc_type == t)) return 1;
        #endif
        if (likely(PyExceptionClass_Check(t))) {
            if (__Pyx_inner_PyErr_GivenExceptionMatches2(exc_type, NULL, t)) return 1;
        } else {
        }
    }
    return 0;
}
static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) {
    if (likely(err == exc_type)) return 1;
    if (likely(PyExceptionClass_Check(err))) {
        if (likely(PyExceptionClass_Check(exc_type))) {
            return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type);
        } else if (likely(PyTuple_Check(exc_type))) {
            return __Pyx_PyErr_GivenExceptionMatchesTuple(err, exc_type);
        } else {
        }
    }
    return PyErr_GivenExceptionMatches(err, exc_type);
}
static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) {
    assert(PyExceptionClass_Check(exc_type1));
    assert(PyExceptionClass_Check(exc_type2));
    if (likely(err == exc_type1 || err == exc_type2)) return 1;
    if (likely(PyExceptionClass_Check(err))) {
        return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2);
    }
    return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2));
}
#endif

/* CheckBinaryVersion */
          static int __Pyx_check_binary_version(void) {
    char ctversion[4], rtversion[4];
    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
        char message[200];
        PyOS_snprintf(message, sizeof(message),
                      "compiletime version %s of module '%.100s' "
                      "does not match runtime version %s",
                      ctversion, __Pyx_MODULE_NAME, rtversion);
        return PyErr_WarnEx(NULL, message, 1);
    }
    return 0;
}

/* InitStrings */
          static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
    while (t->p) {
        #if PY_MAJOR_VERSION < 3
        if (t->is_unicode) {
            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
        } else if (t->intern) {
            *t->p = PyString_InternFromString(t->s);
        } else {
            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
        }
        #else
        if (t->is_unicode | t->is_str) {
            if (t->intern) {
                *t->p = PyUnicode_InternFromString(t->s);
            } else if (t->encoding) {
                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
            } else {
                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
            }
        } else {
            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
        }
        #endif
        if (!*t->p)
            return -1;
        if (PyObject_Hash(*t->p) == -1)
            return -1;
        ++t;
    }
    return 0;
}

static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
}
static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
    Py_ssize_t ignore;
    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
}
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
#if !CYTHON_PEP393_ENABLED
static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
    char* defenc_c;
    PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
    if (!defenc) return NULL;
    defenc_c = PyBytes_AS_STRING(defenc);
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
    {
        char* end = defenc_c + PyBytes_GET_SIZE(defenc);
        char* c;
        for (c = defenc_c; c < end; c++) {
            if ((unsigned char) (*c) >= 128) {
                PyUnicode_AsASCIIString(o);
                return NULL;
            }
        }
    }
#endif
    *length = PyBytes_GET_SIZE(defenc);
    return defenc_c;
}
#else
static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
    if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
    if (likely(PyUnicode_IS_ASCII(o))) {
        *length = PyUnicode_GET_LENGTH(o);
        return PyUnicode_AsUTF8(o);
    } else {
        PyUnicode_AsASCIIString(o);
        return NULL;
    }
#else
    return PyUnicode_AsUTF8AndSize(o, length);
#endif
}
#endif
#endif
static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
    if (
#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
            __Pyx_sys_getdefaultencoding_not_ascii &&
#endif
            PyUnicode_Check(o)) {
        return __Pyx_PyUnicode_AsStringAndSize(o, length);
    } else
#endif
#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
    if (PyByteArray_Check(o)) {
        *length = PyByteArray_GET_SIZE(o);
        return PyByteArray_AS_STRING(o);
    } else
#endif
    {
        char* result;
        int r = PyBytes_AsStringAndSize(o, &result, length);
        if (unlikely(r < 0)) {
            return NULL;
        } else {
            return result;
        }
    }
}
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
   int is_true = x == Py_True;
   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
   else return PyObject_IsTrue(x);
}
static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
    int retval;
    if (unlikely(!x)) return -1;
    retval = __Pyx_PyObject_IsTrue(x);
    Py_DECREF(x);
    return retval;
}
static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) {
#if PY_MAJOR_VERSION >= 3
    if (PyLong_Check(result)) {
        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
                "__int__ returned non-int (type %.200s).  "
                "The ability to return an instance of a strict subclass of int "
                "is deprecated, and may be removed in a future version of Python.",
                Py_TYPE(result)->tp_name)) {
            Py_DECREF(result);
            return NULL;
        }
        return result;
    }
#endif
    PyErr_Format(PyExc_TypeError,
                 "__%.4s__ returned non-%.4s (type %.200s)",
                 type_name, type_name, Py_TYPE(result)->tp_name);
    Py_DECREF(result);
    return NULL;
}
static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
#if CYTHON_USE_TYPE_SLOTS
  PyNumberMethods *m;
#endif
  const char *name = NULL;
  PyObject *res = NULL;
#if PY_MAJOR_VERSION < 3
  if (likely(PyInt_Check(x) || PyLong_Check(x)))
#else
  if (likely(PyLong_Check(x)))
#endif
    return __Pyx_NewRef(x);
#if CYTHON_USE_TYPE_SLOTS
  m = Py_TYPE(x)->tp_as_number;
  #if PY_MAJOR_VERSION < 3
  if (m && m->nb_int) {
    name = "int";
    res = m->nb_int(x);
  }
  else if (m && m->nb_long) {
    name = "long";
    res = m->nb_long(x);
  }
  #else
  if (likely(m && m->nb_int)) {
    name = "int";
    res = m->nb_int(x);
  }
  #endif
#else
  if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
    res = PyNumber_Int(x);
  }
#endif
  if (likely(res)) {
#if PY_MAJOR_VERSION < 3
    if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
#else
    if (unlikely(!PyLong_CheckExact(res))) {
#endif
        return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
    }
  }
  else if (!PyErr_Occurred()) {
    PyErr_SetString(PyExc_TypeError,
                    "an integer is required");
  }
  return res;
}
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
  Py_ssize_t ival;
  PyObject *x;
#if PY_MAJOR_VERSION < 3
  if (likely(PyInt_CheckExact(b))) {
    if (sizeof(Py_ssize_t) >= sizeof(long))
        return PyInt_AS_LONG(b);
    else
        return PyInt_AsSsize_t(b);
  }
#endif
  if (likely(PyLong_CheckExact(b))) {
    #if CYTHON_USE_PYLONG_INTERNALS
    const digit* digits = ((PyLongObject*)b)->ob_digit;
    const Py_ssize_t size = Py_SIZE(b);
    if (likely(__Pyx_sst_abs(size) <= 1)) {
        ival = likely(size) ? digits[0] : 0;
        if (size == -1) ival = -ival;
        return ival;
    } else {
      switch (size) {
         case 2:
           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
             return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
         case -2:
           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
             return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
         case 3:
           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
             return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
         case -3:
           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
             return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
         case 4:
           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
             return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
         case -4:
           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
             return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
           }
           break;
      }
    }
    #endif
    return PyLong_AsSsize_t(b);
  }
  x = PyNumber_Index(b);
  if (!x) return -1;
  ival = PyInt_AsSsize_t(x);
  Py_DECREF(x);
  return ival;
}
static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
  return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False);
}
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
    return PyInt_FromSize_t(ival);
}


#endif /* Py_PYTHON_H */