hikettei · hikettei · Jun 3, 2024 · May 29, 2024 · May 30, 2024 · May 30, 2024
diff --git a/cl-waffe2.asd b/cl-waffe2.asd
@@ -42,7 +42,6 @@
 	       :closer-mop
 	       :trivial-garbage
 	       :cl-waffe2/simd-extension
-
 	       :cl-environments
 	       :numpy-file-format
 	       :jonathan)
@@ -158,20 +157,7 @@
 	       (:file "package")
 
 
-	       (:file "backends/lisp/wf2model")
-	       (:file "backends/JITCPUTensor/package")
-	       (:file "backends/JITCPUTensor/tensor")
-	       (:file "backends/JITCPUTensor/compiler")
-	       (:file "backends/JITCPUTensor/blueprint")	       
-	       (:file "backends/JITCPUTensor/ir")
-	       (:file "backends/JITCPUTensor/on-finalizing")
-	       (:file "backends/JITCPUTensor/dtype")
-	       (:file "backends/JITCPUTensor/foreign-function")
-
-
-	       (:file "backends/JITCPUTensor/impls/arithmetic")
-	       (:file "backends/JITCPUTensor/impls/math")
-
+	       (:file "backends/lisp/wf2model")	       
 	       (:file "optimizers/defoptimizer")
 
 	       (:file "array-converter")
@@ -251,9 +237,6 @@
 	       (:file "backends/cpu/t/arithmetic")
 
 	       (:file "backends/lisp/t/package")
-
-	       (:file "backends/JITCPUTensor/t/package")
-	       (:file "backends/JITCPUTensor/t/jit")
 
 	       (:file "nn/t/package")
 	       (:file "nn/t/conv")

diff --git a/docs/apis/reference.lisp b/docs/apis/reference.lisp
@@ -330,35 +330,3 @@ To get further performance on CPU, SIMD Extension must be installed on your devi
 		  ,@body)))
     (with-op-doc (find-class 'CPUTensor) 't)))
 
-(with-page *cpu-jit-tensor-backend* "[backend] :cl-waffe2/backends.jit.cpu"
-  (insert "
-Those backends without JIT, relies on `do-compiled-loop` or `call-with-view` to calculate only part of matrices: complicated offsets and permution (i.e: `!view` and `!permute`). However, under certain circumstances this can be difficult to parallelise by simply calling a CFFI function; and this backend is intended to solve the problem.
-
-This package provides a `JITCPUTensor` backend which works by jit-compiling whole code to vectorized C, and is only for the purpose of optimising memory layout, so currently only four arithmetic operations and copying are implemented. (OpFusion is remained to be implemented; but it is definitely possible to fuse several ops)
-
-Optimise the memory layout by enclosing the code you want to optimise the layout in:
-
-```lisp
-(with-cpu-jit (CPUTensor LispTensor)
-    ;; body
-    )
-```
-
-Tips: Use the `proceed-bench` function to know the bottleneck; if `MoveTensorNode` combined with `PermuteNode` is slow compared to other nodes, the memory layout is remained to be optimized for example.
-")
-
-  (macrolet ((with-op-doc (name type &body body)
-	       `(progn
-		  (placedoc ,name ,type)
-		  ,@body)))
-    (with-op-doc '*default-c-compiler* 'variable)
-    (with-op-doc '*compiler-flags* 'variable)
-    (with-op-doc '*viz-compiled-code* 'variable)
-
-    (with-op-doc (find-class 'JITCPUTensor) 't)
-
-    (with-op-doc #'cpujit-set-config 'function)
-    (with-op-doc (macro-function 'with-cpu-jit) 'function)
-
-    ))
-
diff --git a/docs/cl-waffe2-docs/docs/cpu-jit-tensor-backend.md b/docs/cl-waffe2-docs/docs/cpu-jit-tensor-backend.md
diff --git a/docs/cl-waffe2-docs/docs/distributions.md b/docs/cl-waffe2-docs/docs/distributions.md
@@ -16,11 +16,11 @@ That is, arguments passed to the `make-tensor` function can also be passed direc
 (normal `(10 10) 0.0 1.0 :requires-grad t)
 
 {LISPTENSOR[float] :shape (10 10)  
-  ((0.09241722   -1.8139324   0.33998385   ~ -1.7448716   -0.11915433  1.2616262)                    
-   (0.034676224  0.31745166   0.8633344    ~ -0.14157301  -0.9596394   -0.52192944)   
+  ((0.2188695    -0.6865164   -0.7096969   ~ 0.5221619    -1.0098034   0.8181761)                    
+   (-0.32395983  -1.7448716   -0.11915433  ~ 1.0900652    -1.8854977   -0.1889971)   
                  ...
-   (-0.018849093 0.10730301   0.7192831    ~ 0.7583118    1.3229972    -1.2871348)
-   (-0.69942784  0.88236964   -0.6999107   ~ -0.3676781   -2.0036936   0.67751735))
+   (-0.7269606   -0.031115573 0.14158817   ~ -1.3815284   0.1739938    1.9703826)
+   (-0.096768215 0.7583118    1.3229972    ~ 0.8591301    -0.56368643  1.4221152))
   :facet :exist
   :requires-grad T
   :backward NIL}
@@ -146,9 +146,9 @@ Note: My implementation is unstable, being occurs floating-overflow constantly..
 (beta `(3 3) 5.0 1.0)
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((0.76956904 0.8549012  0.4545031)
-   (0.914095   0.54855245 0.93831366)
-   (0.7510813  0.45294034 0.9096131))
+  ((0.7730661  0.7142378  0.84765464)
+   (0.9686189  0.76956904 0.8549012)
+   (0.4545031  0.914095   0.54855245))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -170,9 +170,9 @@ p - Takes 1 with probability p and 0 with probalibity (1-p).
 (bernoulli `(3 3) 0.3)
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((0.0 1.0 0.0)
-   (0.0 0.0 0.0)
-   (0.0 0.0 0.0))
+  ((0.0 0.0 0.0)
+   (1.0 0.0 0.0)
+   (1.0 1.0 0.0))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -198,9 +198,9 @@ df - degree of freedom.
 (chisquare `(3 3) 1.0)
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((0.4950697    1.3176446    0.97631454)
-   (0.17928894   0.7067763    0.06370751)
-   (0.0054754415 0.1824935    1.639507))
+  ((1.2326496   0.8483967   0.011596402)
+   (0.4021547   0.027511619 0.08436891)
+   (0.5057191   0.011141564 0.017833697))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -227,9 +227,9 @@ The function exponential is a family of initializer functions, and samples the e
 (exponential `(3 3))
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((1.1214623   2.3461883   0.6938687)
-   (0.08668403  0.46339378  0.18236026)
-   (0.074848704 2.148749    1.5031147))
+  ((0.47960132 5.64849    0.29346126)
+   (0.2325543  0.06230082 0.83188677)
+   (3.1640477  1.1214623  2.3461883))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -252,9 +252,9 @@ The function gamma is a family of initializer functions, and samples matrices fr
 (gamma `(3 3) 1.0)
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((0.20066561  1.0735147   1.0844046)
-   (0.115611516 2.299866    0.2878098)
-   (3.3350327   0.7665806   0.8527828))
+  ((0.66520137  0.52452326  1.8498352)
+   (2.4886518   1.0235898   1.0265534)
+   (2.005943    1.0844046   0.115611516))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -309,9 +309,9 @@ Input:
 (uniform-random `(3 3) 2 4)
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((2.3659306 2.2203517 2.9010468)
-   (2.0084918 2.673732  2.288423)
-   (2.6262066 3.993234  3.0970056))
+  ((3.5922756 3.226882  2.4872847)
+   (2.952711  3.6584299 3.6804876)
+   (2.5902324 3.003483  3.9570756))
   :facet :exist
   :requires-grad NIL
   :backward NIL}
@@ -339,9 +339,9 @@ The function randn is a family of initializer functions, and samples the gaussia
 (randn `(3 3))
 
 {LISPTENSOR[float] :shape (3 3)  
-  ((1.6019585    -0.715621    -0.015199781)
-   (-0.3570089   0.028855132  -0.67624414)
-   (0.003321576  0.093405314  0.43108767))
+  ((-0.79764336 0.3408451   0.31103113)
+   (-1.7451742  0.6599348   0.05832184)
+   (-0.44232148 -0.81750864 -0.496161))
   :facet :exist
   :requires-grad NIL
   :backward NIL}

diff --git a/docs/cl-waffe2-docs/docs/generic-tensor.md b/docs/cl-waffe2-docs/docs/generic-tensor.md
@@ -450,7 +450,7 @@ Compiles the given computation node starting from `toplevel`. The docstring of `
 > (setq out (!add (make-input `(a 10) :X) (make-input `(a 10) :Y)))
 ```
 ```
-{LISPTENSOR[float] :shape (A 10) :id TID1560 
+{LISPTENSOR[float] :shape (A 10) :id TID937 
   :vec-state [maybe-not-computed]
     <<Not allocated: size=(A 10)>>
   :facet :input
@@ -648,7 +648,7 @@ In order to parse the state_dict key, the function `parse-state-dict-key` is ava
 > (make-state-dict (build (call (LinearLayer 10 10) (randn `(10 10)))))
 ```
 ```
-#S(STATE-DICT :TABLE #<HASH-TABLE :TEST EQUAL :COUNT 2 {7008E2C253}>
+#S(STATE-DICT :TABLE #<HASH-TABLE :TEST EQUAL :COUNT 2 {70066EC283}>
  table-key-to-value:
     param:linearlayer.0.bias    -> LISPTENSOR{FLOAT}(10)
     param:linearlayer.0.weights -> LISPTENSOR{FLOAT}(10 10)