path: root/firmware
diff options
authorDaniel Silverstone <>2019-05-12 11:37:11 +0100
committerDaniel Silverstone <>2019-05-12 11:37:11 +0100
commit0b7b5f8b820ceb8227780530b3aea437f629a49a (patch)
tree2e70878ac3af6876e894db0c6a8c8d24dfee7d4d /firmware
parent2e344fb0764c6c22472907b97df0ebbac23b5e4d (diff)
Various tweaks to the firmware, and some design docs
Diffstat (limited to 'firmware')
7 files changed, 323 insertions, 3 deletions
diff --git a/firmware/.gdbinit b/firmware/.gdbinit
new file mode 100644
index 0000000..ab9e03c
--- /dev/null
+++ b/firmware/.gdbinit
@@ -0,0 +1,3 @@
+target remote localhost:3333
+monitor reset halt
diff --git a/firmware/.vscode/settings.json b/firmware/.vscode/settings.json
new file mode 100644
index 0000000..0242e18
--- /dev/null
+++ b/firmware/.vscode/settings.json
@@ -0,0 +1,3 @@
+ "rust.all_targets": false
+} \ No newline at end of file
diff --git a/firmware/Cargo.lock b/firmware/Cargo.lock
index 0c9fc15..260685c 100644
--- a/firmware/Cargo.lock
+++ b/firmware/Cargo.lock
@@ -129,6 +129,8 @@ dependencies = [
"cortex-m 0.6.0 (registry+",
"cortex-m-rt 0.6.8 (registry+",
"cortex-m-rtfm 0.4.3 (registry+",
+ "nb 0.1.2 (registry+",
+ "panic-halt 0.2.0 (registry+",
"panic-semihosting 0.5.2 (registry+",
"stm32f103xx-usb 0.1.0 (git+",
"stm32f1xx-hal 0.3.0 (registry+",
@@ -196,6 +198,11 @@ dependencies = [
+name = "panic-halt"
+version = "0.2.0"
+source = "registry+"
name = "panic-semihosting"
version = "0.5.2"
source = "registry+"
@@ -374,6 +381,7 @@ dependencies = [
"checksum nb 0.1.2 (registry+" = "b1411551beb3c11dedfb0a90a0fa256b47d28b9ec2cdff34c25a2fa59e45dbdc"
"checksum owned-singleton 0.1.0 (registry+" = "21aa378869c97c7db706a4c576cf0ce8258dc7e0e1ad25a97ee5aba1fd4eed83"
"checksum owned-singleton-macros 0.1.0 (registry+" = "8d5c2ac071b95017bf70a5b607037fb0ef4abfa663fdf6cac8fbf36af9756ee3"
+"checksum panic-halt 0.2.0 (registry+" = "de96540e0ebde571dc55c73d60ef407c653844e6f9a1e2fdbd40c07b9252d812"
"checksum panic-semihosting 0.5.2 (registry+" = "97cfb37c1d3b5f0cc18bf14485018cccd13bdd24f7b5bfd456c1d8760afef824"
"checksum proc-macro2 0.4.30 (registry+" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
"checksum quote 0.6.12 (registry+" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db"
diff --git a/firmware/Cargo.toml b/firmware/Cargo.toml
index 33e2766..0d6ce9e 100644
--- a/firmware/Cargo.toml
+++ b/firmware/Cargo.toml
@@ -13,4 +13,19 @@ usb-device = { git = "", rev = "e58e30f3
stm32f103xx-usb = { git = "", rev = "93b4d47a21a8fe616a8e044bbd7628b530df8095" }
cortex-m-rt = "0.6.8"
cortex-m-rtfm = "0.4.3"
-panic-semihosting = "0.5.2"
+nb = "0.1.2"
+panic-semihosting = { version = "0.5.2", optional = true }
+panic-halt = { version = "0.2.0", optional = true }
+default = ["semihosting", "keil"]
+semihosting = ["panic-semihosting"]
+halt = ["panic-halt"]
+keil = []
diff --git a/firmware/ b/firmware/
new file mode 100644
index 0000000..c6ede8b
--- /dev/null
+++ b/firmware/
@@ -0,0 +1,45 @@
+# Entropy Key v3 Hardware notes
+## Prototype rev.A hardware setup
+### LEDs
+Always open/drain
+PB3 is red, TIM2_CH2
+PB6 is yellow, TIM4_CH1
+PB7 is blue, TIM2_CH2
+<> may be of use here
+### Analog
+3.3v_MID is PA0, ADC12_IN0
+5V_MID is PA1, ADC12_IN1
+HT_MID is PA2, ADC12_IN2
+### Misc
+USB presence is PA8 and is PP active high
+HT_EN is PB0
+### Generators
+GEN_CLK_O is PA6 which is TIM3_CH1
+GEN_CLK inputs on PA5 and PB13 which are SPI1_SCK and SPI2_SCK respectively
+GEN_0_OUT and GEN_1_OUT input on PA7 and PB15 which are SPI1_MOSI and SPI2_MOSI respectively
+It's worth noting that SPI2 shares some pins with some JTAG lines, so the JTAG
+will have to be disabled (leaving SWD active) in order that it can operate at all.
+## Keil board differences
+USB presence is PD2 and is OD, active low
+LEDs are:
+PB8, TIM4_CH3
+PB9, TIM4_CH4
+PB10, TIM2_CH3 AF REMAP = 0b10
+PB11, TIM2_CH4 AF REMAP = 0b10
diff --git a/firmware/ b/firmware/
new file mode 100644
index 0000000..d9bbfe2
--- /dev/null
+++ b/firmware/
@@ -0,0 +1,167 @@
+# Entropykey v3 Firmware internals
+This document aims to describe some amount of the EntropyKey v3's firmware
+internals from the point of view of intending to aid in design and debugging.
+The firmware is written in Rust. It uses mvirkkunen's USB stack for STM32
+and it is an RTFM application.
+The flow of data from the device to the host is over a CDC Serial link for
+now though this is an internal implementation detail and is not exposed to the
+rest of the firmware to allow for replacement with neater RAW apis later.
+As such, all handling of the "protocol" is kept within the USB layer at this
+Data flow within the device is somewhat more complex as there are a number of
+stages that data has to be processed in. Firstly the two generators are hooked
+up to SPI devices, and as such, we have to process the data from them SPI word
+by SPI word. Then we have a number of software mixing stages which are semi
+asynchronous from one another, and then an output framing to flow into the USB
+layer as described above.
+## SPI setup
+The SPI setup is critical to the performance of the entropykey. The SPI devices
+are configured at the hardware level to be slaves, with their clock driven from
+TIM3_CH1 allowing for varying the capture bitrate with moderate ease. The
+devices should be set with software NSS permanently active, 16 bit framing,
+read only (MOSI only enabled, BIDIMODE=0, RXONLY=1)
+The clock polarity and phase is not important, other than that we wish both
+devices to be configured the same to reduce chances of not spotting correlation.
+Ditto MSB/LSB first is irrelevant but must be the same between the devices.
+For efficiency, we tie DMA channels to the SPI devices so that the data is read
+out of the SPI devices using DMA rather than using software interrupt. This
+allows for us to take fewer interrupts while handling the data flow.
+### DMA setup for SPI
+Our device only has DMA1 since it's not high density.
+We use channel 2 for SPI1_RX and channel 4 for SPI2_RX
+As we're using 16 bit framing, we do a 16bit Peripheral-to-Memory transfer
+setup, arranging the transfers in groups of 64 halfwords, thus consuming 256
+bytes of memory for the two DMA buffers in use.
+We trigger from the half-transfer and full-transfer interrupts to consume the
+first or second halves of the buffers respectively, and the channels are
+arranged in circular mode so that once set up they simply generate IRQs and
+are otherwise uninteresting to us.
+We can control the flow of data out of the SPI devices by varying the rate of
+the clock coming from the TIM3_CH1 channel. It's worth noting that we ought
+to match up CPOL/CPHA and the idle state of that channel so that if we disable
+the PWM on that channel, then the SPI blocks aren't unhappy.
+Because the two channels are lock-stepped and channel 2 has higher priority over
+channel 4, if we only take the interrupt from channel 4 we can be confident that
+channel 2's content has already been filled to the same level. As such we take
+one interrupt which tells us that the two buffers have reached the same half-fill
+(first or second half) and we can process them both without needing to wait
+for a second interrupt.
+## Data flow from the SPI DMA buffers
+On IRQ there will be 64 bytes of generated data on each of two channels. To
+proceed three entropy estimations have to occur. We must estimate the entropy
+in each of the buffers independently, and also in a virtual buffer formed by
+xoring the data together. The running average of those estimations provides us
+with an idea of whether or not the generators are tending toward an extreme
+or correlating with each other.
+If the total number of shannons estimated by this process is less than the hash
+size of the mixing function then we designate that block as failed and skip the
+following stages, instead waiting for the next interrupt.
+We then mix the full 128 bytes of data from the two generators into our mixing
+function, crediting it with the entropy estimates generated from the first
+stage processing. The maximum amount of entropy which could be credited is
+therefore 1024 shannons during this stage of processing. Providing that the
+total is greater than the hash size, we're OK and we will claim the hash size
+of shannons as we move on.
+## Flowing data into the FIPS checks
+Once the data leaves the hashing function attached to the SPI DMA buffers, we
+have to gather it together into a FIPS 140-2 sized buffer for validation. This
+process requires that we acquire 20,000 bits of data which if we have a 128 bit
+hashing function will effectively be represented by a 2512 byte bufer (157 hashes)
+of which we will use about 156 and a half.
+Ideally we'll fit a pair of these buffers into RAM, though we accept that it's
+possible we won't be able to. Initial estimates suggest we'll manage it. The
+USB stack consumes about 600B and all the above maybe 1 or 2 KiB, so that ought
+to leave plenty of room for a pair of FIPS buffers (one accumulating, one being
+processed / sent out).
+When a FIPS buffer is filled, it has to be validated by means of the three FIPS
+140-2 checks. These are known as the monobit, poker, and runs checks. FIPS
+defines them with floatypoint and a number of other hassles, so we have our own
+integer-only check customised for our target.
+Whether the buffer passes or fails the FIPS tests is irrelevant, it is sent to
+the client.
+## Analogue checks
+We're going to be monitoring the 5v, 3.3v, HT line, and the current temperature
+of the µC. These are done on a single ADC in a round-robin fashion where every
+time we complete a conversion. To do the best job we can, we run the ADCs in
+their slowest possible mode. We also use SCAN mode and a DMA channel to ensure
+that we take interrupts as infrequently as possible. The channels scanned
+are always the VREF_INT, the TEMP_INT, and the three voltages above.
+We set the sampling time to the longest possible, and reduce ADCCLK to as slow
+as makes sense. Our goal is to have as few interrupts as we can do, while still
+usefully monitoring the lines. We have to use ADC1 for this because ADC2 only
+has DMA by means of slaving to ADC1 anyway and also VREF_INT and TEMP_INT are
+only muxed into ADC1's inputs.
+The slowest we can set the ADC clock to is PCLK2/8 which is therefore 9MHz since
+we're running APB2 at full speed for DMA etc reasons on the SPI.
+The recommended sample time for the temperature sensor is 17.5µS and at 9MHz
+each cycle is one nineth of a microsecond. As such we should set the sample
+time selection bits to 0b111 which is 239.5 cycles (slowest) thereby allowing
+for full precision. Once the DMA complete interrupt is taken, we can post
+updated values for the measured entries to the statistics process. Conversion
+is started *ONLY* whenever a FIPS buffer completes.
+### DMA for ADCs
+The DMA for ADC1 comes in only on DMA1 Channel 1. As such that needs to be set
+to a very low priority so that requests for DMA don't overrule SPI completions.
+The DMA has to be 16 bit in/out and must be for all five channels.
+## Statistics and what they mean
+If the input mixers go "bad" then the system throughput is essentially halted.
+Where 'bad' means that the input entropy estimators are dropping too low too
+For the FIPS checks to signal a failure there has to be a significant number
+of failed blocks within the test period. A reasonable approach is to permit
+no more than 6 or 8 failed blocks per 4096 tested blocks. If we reach that
+limit then we also lock the system out.
+With respect to the analogue values, we're looking for outliers. For example
+if the 3.3v line goes too low or high, or the 5v line drops, the HT line looks
+iffy, etc. Temperature values are also gently poked at to ensure that we're not
+being margined by temperature. Any extreme value for too long will result in
+the system locking out.
+A locked out system is achieved by simply quiescing the timer which drives the
+SPI block.
+## Managing the throughput of the device
+After measurement to determine the theoretical maximum throughput of the device
+we can choose a slightly slower bitrate which we set as the maximum supported
+timer clock rate. After that we can reduce the timer if we detect that the
+output buffers are consistently having data thrown away, thereby reducing the
+power load. If we're often blocked because there's no data available to send
+and there's room to speed up the timer, we can do so.
diff --git a/firmware/src/ b/firmware/src/
index 15877c6..3e8393f 100644
--- a/firmware/src/
+++ b/firmware/src/
@@ -4,17 +4,96 @@
//! Entropykey v3 firmware
+#[cfg(feature = "semihosting")]
extern crate panic_semihosting;
+#[cfg(feature = "halt")]
+extern crate panic_halt;
use rtfm::app;
-use stm32f1xx_hal::prelude::*;
+use stm32f1xx_hal::{
+ gpio::gpiob::{PB8, PB9},
+ gpio::{Alternate, Output, PushPull, State},
+ prelude::*,
+ pwm::{Pins, Pwm, C3},
+ stm32::{TIM3 as TIM3_, TIM4},
+ timer::{Event, Timer},
+struct OneLED(PB8<Alternate<PushPull>>);
+impl Pins<TIM4> for OneLED {
+ const REMAP: u8 = 0b00;
+ const C1: bool = false;
+ const C2: bool = false;
+ const C3: bool = true;
+ const C4: bool = false;
+ type Channels = (Pwm<TIM4, C3>,);
#[app(device = stm32f1xx_hal::stm32)]
const APP: () = {
+ static mut PWM: Pwm<TIM4, C3> = ();
+ static mut TIMER: Timer<TIM3_> = ();
+ static mut CYCLE: bool = false;
+ static mut COUNT: usize = 0;
+ static mut LED: PB9<Output<PushPull>> = ();
fn init() {
+ let mut flash = device.FLASH.constrain();
+ let mut rcc = device.RCC.constrain();
+ let mut afio = device.AFIO.constrain(&mut rcc.apb2);
+ let clocks = rcc
+ .cfgr
+ .use_hse(8.mhz())
+ .sysclk(72.mhz())
+ .pclk1(24.mhz())
+ .freeze(&mut flash.acr);
+ assert!(clocks.usbclk_valid());
+ let mut gpiob = device.GPIOB.split(&mut rcc.apb2);
+ let led = OneLED(gpiob.pb8.into_alternate_push_pull(&mut gpiob.crh));
+ let mut pwm = device
+ .TIM4
+ .pwm(led, &mut afio.mapr, 10_000.hz(), clocks, &mut rcc.apb1)
+ .0;
+ pwm.set_duty(pwm.get_max_duty() / 4);
+ pwm.enable();
+ let mut timer = Timer::tim3(device.TIM3, 100.hz(), clocks, &mut rcc.apb1);
+ timer.listen(Event::Update);
+ PWM = pwm;
+ TIMER = timer;
+ LED = gpiob
+ .pb9
+ .into_push_pull_output_with_state(&mut gpiob.crh, State::Low);
+ #[interrupt(priority = 1, resources = [PWM, TIMER, COUNT, CYCLE, LED])]
+ fn TIM3() {
+ if *resources.CYCLE {
+ resources.PWM.set_duty(
+ ((resources.PWM.get_max_duty() as usize * (*resources.COUNT)) / 100) as u16,
+ );
+ *resources.COUNT -= 1;
+ if *resources.COUNT == 0 {
+ *resources.CYCLE = false;
+ resources.LED.set_low();
+ }
+ } else {
+ resources.PWM.set_duty(
+ ((resources.PWM.get_max_duty() as usize * (*resources.COUNT)) / 100) as u16,
+ );
+ *resources.COUNT += 1;
+ if *resources.COUNT == 100 {
+ *resources.CYCLE = true;
+ resources.LED.set_high();
+ }
+ }
+ resources.TIMER.clear_update_interrupt_flag();
+ }