diff --git a/firmware/rnn.c b/firmware/rnn.c index 810774dd4779c10ebf90954476386d5283834199..908342a38c8d15b7ee38ff7b92dd67cb5585f52d 100644 --- a/firmware/rnn.c +++ b/firmware/rnn.c @@ -267,12 +267,13 @@ int run_training_single_epoch( int error; int last_val; uint16_t train_mask; - uint16_t y = xp[xi-1];; + uint16_t y = xp[xi-1]; /* * Revert the LSTM states */ reset_network(); + /* * Put all the prior tokens of the time series * into the LSTM for reaching the state we wish to train on @@ -289,6 +290,7 @@ int run_training_single_epoch( if(last_val==y) { return 0; } + /* * Determine the training mask my finding out which * neurons misfired or didn't fire although they should have diff --git a/src/cpptb/ecp5_minifpga.cpp b/src/cpptb/ecp5_minifpga.cpp index 76bacae559fccbebf178beb2bb989d1eb85d0188..ff40f8bae018caca7c72244011c1e7d84d56febb 100644 --- a/src/cpptb/ecp5_minifpga.cpp +++ b/src/cpptb/ecp5_minifpga.cpp @@ -18,7 +18,7 @@ #define ACK 06 #define SYN 22 -//#define TRACE 1 +#define TRACE 1 #ifndef TRACE #define TRACE 0 diff --git a/src/py/fac_tools.py b/src/py/fac_tools.py index 624eb9d1673aa78e5cec32f026ea631ff98d6e46..9430af325f06abb7dd21635926361640e9cd4624 100644 --- a/src/py/fac_tools.py +++ b/src/py/fac_tools.py @@ -35,7 +35,7 @@ def run_command(server, cmd): cmd = cmd.encode("ascii") server.send(cmd) response = decode(server.recv(BUFSIZE), 'ascii') - print("commands: ",cmd," response: ",response) + #print("commands: ",cmd," response: ",response) sys.stdout.flush() return response @@ -100,8 +100,11 @@ def extract_single_value(server, layer, wtype): def dump_neural_network(server): weights_and_biases = {} + print("Reading ENCODER") weights_and_biases['ENCODER'] = extract_values(server,"ENCODER") + print("Reading HIDDEN") weights_and_biases['HIDDEN'] = extract_values(server,"HIDDEN") + print("Reading DECODER") weights_and_biases['DECODER'] = extract_values(server,"DECODER") return weights_and_biases @@ -129,8 +132,11 @@ def write_layer(server, layer_name, layer): nidx=nidx+1 def load_weights_and_biases(server,weights_and_biases): + print("Loeading ENCODER") write_layer(server, "ENCODER", weights_and_biases['ENCODER']) + print("Loeading HIDDEN") write_layer(server, "HIDDEN", weights_and_biases['HIDDEN']) + print("Loeading DECODER") write_layer(server, "DECODER", weights_and_biases['DECODER']) def init_weights_and_biases(server): @@ -140,16 +146,54 @@ def init_weights_and_biases(server): run_command(server,"BIAS") def train_token_series(server, tokens, epochs): + arr = [] run_command(server,"TRAIN") run_command(server,"TOKENS") for tok in tokens: run_command(server,str(tok)) run_command(server,"DONE") + run_command(server,"TRAIN") run_command(server,"RUN_SINGLE_EPOCH") for i in range(0,epochs): print("Training epoch: ",i) ret = run_command(server,str(i)) + arr.append(int(ret)) print("Error: ", ret) if "0" == ret: break + return arr + + +# The decay rate is a hyper parameter +# Because delta W aka alpha is calculated +# as the initial learning rate α0 multiplied +# as shown below anything floaty doesn't make sense +# anyway. Usually the decay rate is set to 1 +# α=(1/(1+decayRate×epochNumber))*​α0 +def set_decay_rate(server, rate): + run_command(server,"TRAIN") + run_command(server,"DECAY_RATE") + run_command(server,str(rate)) + + +# PicoRV does not have a floating point unit +# All weights are stored as signed 32 bit integers +# this means we have to do the math here and multiply +# the maximum integer value by our desired learning rate + +INTMAX=2147483647 +def set_learning_rate(server, learning_rate): + lr=int(learning_rate*INTMAX) + run_command(server,"TRAIN") + run_command(server,"LEARNING_RATE") + run_command(server,str(lr)) + +def predict_next_token(server, token): + run_command(server,"PREDICT") + ret = run_command(server,str(token)) + return int(ret) + +def reset_states(server): + run_command(server,"RESET") + diff --git a/src/py/tty3.py b/src/py/tty3.py index ab8091d15d3c1c121ff0704a9589649e9b2a9782..a81a08403dce55912482ba8c7396de4235ea153f 100644 --- a/src/py/tty3.py +++ b/src/py/tty3.py @@ -6,61 +6,94 @@ from fac_tools import load_weights_and_biases from fac_tools import dump_neural_network from fac_tools import init_weights_and_biases from fac_tools import train_token_series +from fac_tools import set_decay_rate +from fac_tools import set_learning_rate +from fac_tools import predict_next_token +from fac_tools import reset_states -from transformers import AutoTokenizer +#from transformers import AutoTokenizer -tokenizer = AutoTokenizer.from_pretrained("gpt2") -prompt = "Taxation is theft" -input_ids = tokenizer(prompt, return_tensors="pt").input_ids -tokens = input_ids[0].numpy() +import matplotlib.pyplot as plt +import numpy as np + +#tokenizer = AutoTokenizer.from_pretrained("gpt2") +#prompt = "Taxation is theft" +#input_ids = tokenizer(prompt, return_tensors="pt").input_ids +#tokens = input_ids[0].numpy() +tokens = [27017, 341, 318, 12402] # Taxation is theft print(tokens) server = get_fac_wrapper("telnet") -INTMAX=2147483647 -lr=0.95 -decay_rate=1 - run_command(server,"HELLO") # Initialize the values init_weights_and_biases(server) -# PicoRV does not have a floating point unit -# All weights are stored as signed 32 bit integers -# this means we have to do the math here and multiply -# the maximum integer value by our desired learning rate -lr=lr*INTMAX - -run_command(server,"TRAIN") -run_command(server,"LEARNING_RATE") -run_command(server,str(int(lr))) - -run_command(server,"TRAIN") -run_command(server,"DECAY_RATE") # The decay rate is a hyper parameter # Because delta W aka alpha is calculated # as the initial learning rate α0 multiplied # as shown below anything floaty doesn't make sense # anyway. Usually the decay rate is set to 1 # α=(1/(1+decayRate×epochNumber))*​α0 -run_command(server,str(decay_rate)) +set_decay_rate(server, 1) max_epochs=1000 -# Priming phase! + +set_decay_rate(server, 1) + +errors=[] + # Upload and train token pairs first -train_token_series(server, tokens[0:2], max_epochs) -run_command(server,"DONE") -train_token_series(server, tokens[1:3], max_epochs) -run_command(server,"DONE") -train_token_series(server, tokens[2:4], max_epochs) -run_command(server,"DONE") - -# Upload token series -train_token_series(server, tokens[0:3], max_epochs) -run_command(server,"DONE") -train_token_series(server, tokens[0:4], max_epochs) -run_command(server,"DONE") + +for i in range(10): + reset_states(server) + + set_learning_rate(server, 0.95) + errors += train_token_series(server, tokens[0:2], max_epochs) + run_command(server,"DONE") + +''' + reset_states(server) + + set_learning_rate(server, 0.95) + errors += train_token_series(server, tokens[1:3], max_epochs) + run_command(server,"DONE") + + reset_states(server) + + set_learning_rate(server, 0.95) + errors += train_token_series(server, tokens[2:4], max_epochs) + run_command(server,"DONE") + + set_learning_rate(server, 0.80) + errors += train_token_series(server, tokens[0:3], max_epochs) + run_command(server,"DONE") + + set_learning_rate(server, 0.80) + errors += train_token_series(server, tokens[0:4], max_epochs) + run_command(server,"DONE") +''' + + +reset_states(server) + +print("Input Token: ", hex(tokens[0])) +tok = predict_next_token(server,tokens[0]) +print("Output Token: ", hex(tok)) +print("Expected Token: ", hex(tokens[1])) + +reset_states(server) + +print("Input Token: ", hex(tokens[1])) +tok = predict_next_token(server,tokens[1]) +print("Output Token: ", hex(tok)) +print("Expected Token: ", hex(tokens[2])) + +xaxis = np.array(range(len(errors))) +yaxis = np.array(errors) +plt.plot(xaxis, yaxis) +plt.show() # Store the weights and biases weights_and_biases = dump_neural_network(server) @@ -72,6 +105,6 @@ run_command(server,"TERMINATE") server.close() print("Writing out JSON") -with open("result/weights_and_biases_trained.json", "w") as f: +with open("test_files/weights_and_biases_trained.json", "w") as f: f.write(j) f.close() diff --git a/test_files/weights_and_biases_trained.json b/test_files/weights_and_biases_trained.json index c1b0724dd939af20b4c705616741b35244dbf48f..8ca33aab69bfa77db6c3012a4fd52123aa2ced6f 100644 --- a/test_files/weights_and_biases_trained.json +++ b/test_files/weights_and_biases_trained.json @@ -3,187 +3,178 @@ { "BIAS": 1, "WEIGHTS": [ - 690473296, - -1494986556 + -1034732310, + -79821513 ] }, { "BIAS": 1, "WEIGHTS": [ - -1694761729, - -315753447 + 1381756115, + -1522003209 ] }, { "BIAS": 1, "WEIGHTS": [ - -126767594, - -1641191379 + -1038745860, + -912475217 ] }, { "BIAS": 1, "WEIGHTS": [ - -487379097, - -22175691 + 1318432283, + -1645263189 ] }, { "BIAS": 1, "WEIGHTS": [ - 690473296, - -1494986556 + -1034732310, + -79821513 ] }, { "BIAS": 1, "WEIGHTS": [ - -1694761729, - -315753447 + 1381756115, + -1522003209 ] }, { "BIAS": 1, "WEIGHTS": [ - -126767594, - -1641191379 + -1038745860, + -912475217 ] }, { "BIAS": 1, "WEIGHTS": [ - -487379097, - -22175691 + 1318432283, + -1645263189 ] }, { "BIAS": 1, "WEIGHTS": [ - 690473296, - -1494986556 + -1034732310, + -79821513 ] }, { "BIAS": 1, "WEIGHTS": [ - -1694761729, - -315753447 + 1381756115, + -1522003209 ] }, { "BIAS": 1, "WEIGHTS": [ - -126767594, - -1641191379 + -1038745860, + -912475217 ] }, { "BIAS": 1, "WEIGHTS": [ - -487379097, - -22175691 + 1318432283, + -1645263189 ] }, { "BIAS": 1, "WEIGHTS": [ - 690473296, - -1494986556 + -1034732310, + -79821513 ] }, { "BIAS": 1, "WEIGHTS": [ - -1694761729, - -315753447 + 1381756115, + -1522003209 ] }, { "BIAS": 1, "WEIGHTS": [ - -126767594, - -1641191379 + -1038745860, + -912475217 ] }, { "BIAS": 1, "WEIGHTS": [ - -487379097, - -22175691 + 1318432283, + -1645263189 ] } ], "ENCODER": [ { - "BIAS": 1, - "WEIGHTS": [ - -1086882246, - 890065689, - -658436233, - -1827809058, - -2094016881, - 994122152, - -1486927413, - 1046162757, - 269476072, - -1235243704, - 102278420, - 282878551, - 1689755965, - 2062872118, - 1132458446, - 1743139792, - 1315553822, - 1610531142, - -1376266016 - ] - }, - { - "BIAS": 1, - "WEIGHTS": [ - -669666177, - 1911731843, - -397663223, - 2074169613, - -1040292774, - 418280326, - 1987303962, - -1976815945, - 456374444, - 1173744692, - 942648900, - -1178699798, - 1482248008, - -1152024688, - 132396759, - 999663228, - -493460814, - 1522311734, - -780619073 - ] - }, - { - "BIAS": 1, - "WEIGHTS": [ - 1679272388, - 1871577013, - 1791353142, - -1423064393, - 1285057065, - 522016831, - -2018470129, - 1669540664, - -1130412574, - -1134005882, - -2139609887, - 1367470096, - 1607211244, - -292804344, - -1013691980, - -2037205983, - -81756222, - 780729914, - -400218627 + "BIAS": 341, + "WEIGHTS": [ + 63119415, + -1860953127, + 1146220817, + -62698976, + -2045032645, + 977250893, + -820875908, + -885174628, + -1813389698, + -1566850845, + -1018519090, + -59686292, + 696136573, + 1773846901, + 34924138, + -1052424470 + ] + }, + { + "BIAS": 1, + "WEIGHTS": [ + 2085120548, + 1294235581, + -804460127, + -1043503504, + -2146568414, + 1020546817, + 815712474, + 1077016178, + 1788558396, + 646281034, + 345631526, + 173104885, + 866307519, + 780986042, + -50951644, + 1556856787 + ] + }, + { + "BIAS": 1, + "WEIGHTS": [ + 849095779, + 932215177, + 1045113396, + -2116211009, + -485669004, + -92608312, + -1876355924, + -2124988502, + 1012634633, + -222106975, + 673084868, + 669198773, + 1484656970, + 1879132618, + -687476123, + -579203936 ] } ], @@ -191,41 +182,41 @@ { "BIAS": 1, "WEIGHTS": [ - -1770528584, - 269103260, - 1574045900, - -1108884550, - -550259312 + 1699754452, + -1148835161, + -1376990472, + -639733897, + -628294308 ] }, { "BIAS": 1, "WEIGHTS": [ - 1770876661, - -963528172, - 337955023, - 186590671, - 1191602015 + 853200057, + 905984470, + -588533757, + 369555484, + -533554235 ] }, { "BIAS": 1, "WEIGHTS": [ - -1176110208, - 359088383, - -24511787, - 696042995, - -1183076447 + -2025814933, + 1006153275, + 839868893, + 1533654251, + -377986340 ] }, { "BIAS": 1, "WEIGHTS": [ - -1913876046, - 423571563, - 1115446965, - 1581204454, - -685626971 + 181100543, + 1899086286, + 1758429692, + -664336638, + -541206596 ] } ]