Comparison with foldingΒΆ

This is a common example when illustrating folding.

In general, the main problem with folding is to determine a suitable folding order. This corresponds to scheduling the operations.

Here, the folding order is the same for the adders as in the standard solution to this problem, but the order of the multipliers is different to keep each memory variable shorter than the scheduling period.

from b_asic.architecture import Architecture, Memory, ProcessingElement
from b_asic.core_operations import Addition, ConstantMultiplication
from b_asic.schedule import Schedule
from b_asic.scheduler import ASAPScheduler
from b_asic.sfg import SFG
from b_asic.special_operations import Delay, Input, Output

in1 = Input("IN")
T1 = Delay()
T2 = Delay(T1)
a = ConstantMultiplication(0.2, T1, "a")
b = ConstantMultiplication(0.3, T1, "b")
c = ConstantMultiplication(0.4, T2, "c")
d = ConstantMultiplication(0.6, T2, "d")
add2 = a + c
add1 = in1 + add2
add3 = b + d
T1 <<= add1
out1 = Output(add1 + add3, "OUT")

sfg = SFG(inputs=[in1], outputs=[out1], name="Bi-quad folding example")

The SFG looks like:

sfg
%3 in0 IN (in0) add0 add0 in0:e->add0 0 add0.0 add0->add0.0 out0 OUT (out0) add2 add2 add2->out0:w add1 add1 add1->add0 1 add0.0->add2 0 t0 t0 add0.0->t0 t0.0 t0->t0.0 add3 add3 add3->add2 1 cmul0 b (cmul0) cmul0->add3 0 cmul1 d (cmul1) cmul1->add3 1 t1.0 t1.0->cmul1 cmul2 c (cmul2) t1.0->cmul2 t1 t1 t1->t1.0 t0.0->cmul0 t0.0->t1 cmul3 a (cmul3) t0.0->cmul3 cmul2->add1 1 cmul3->add1 0


Set latencies and execution times

sfg.set_latency_of_type(ConstantMultiplication, 2)
sfg.set_latency_of_type(Addition, 1)
sfg.set_execution_time_of_type(ConstantMultiplication, 1)
sfg.set_execution_time_of_type(Addition, 1)

Create schedule

schedule = Schedule(sfg, scheduler=ASAPScheduler(), cyclic=True)
schedule
2026-06-03T14:26:29.019091 image/svg+xml Matplotlib v3.10.9, https://matplotlib.org/


Reschedule to only require one adder and one multiplier

schedule.move_operation('out0', 2)
schedule.move_operation('add2', 2)
schedule.move_operation('cmul2', -3)
schedule.move_operation('add3', 3)
schedule.move_operation('cmul1', -3)
schedule.set_schedule_time(4)
schedule.move_operation('cmul1', 1)
schedule.move_operation('cmul0', 1)
schedule.move_operation('in0', 3)
schedule.move_operation('cmul2', -1)
schedule.move_operation('cmul0', 1)
schedule
2026-06-03T14:26:29.124239 image/svg+xml Matplotlib v3.10.9, https://matplotlib.org/


Extract operations and create processing elements

operations = schedule.get_operations()
adders = operations.get_by_type_name('add')
adders.show(title="Adder executions")
mults = operations.get_by_type_name('cmul')
mults.show(title="Multiplier executions")
inputs = operations.get_by_type_name('in')
inputs.show(title="Input executions")
outputs = operations.get_by_type_name('out')
outputs.show(title="Output executions")

p1 = ProcessingElement(adders, entity_name="adder")
p2 = ProcessingElement(mults, entity_name="cmul")
p_in = ProcessingElement(inputs, entity_name='input')
p_out = ProcessingElement(outputs, entity_name='output')
  • Adder executions
  • Multiplier executions
  • Input executions
  • Output executions

Extract and assign memory variables

mem_vars = schedule.get_memory_variables()
mem_vars.show(title="All memory variables")
direct, mem_vars = mem_vars.split_on_length()
mem_vars.show(title="Non-zero time memory variables")
mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)

memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name}")
    memory.assign("left_edge")
    memory.show_content(title=f"Assigned {memory.entity_name}")

direct.show(title="Direct interconnects")
  • All memory variables
  • Non-zero time memory variables
  • memory0
  • Assigned memory0
  • memory1
  • Assigned memory1
  • Direct interconnects

Create architecture

arch = Architecture({p1, p2, p_in, p_out}, memories, direct_interconnects=direct)

The architecture can be rendered in enriched shells.

arch
%3 cluster_memories Memories cluster_pes Processing Elements cluster_io_in Inputs cluster_io_out Outputs memory0 0 memory0 : (RAM, 2 cells) 0 _wl_out_9 memory0 memory0:e->_wl_out_9 memory1 0 memory1 : (RAM, 1 cell) 0 _wl_out_13 memory1 memory1:e->_wl_out_13 adder 0 adder 0 1 _wl_out_0 adder adder:e->_wl_out_0 cmul 0 cmul 0 _wl_out_5 cmul cmul:e->_wl_out_5 input input 0 adder_in0_mux 0 adder_in0_mux 0 1 2 input:e->adder_in0_mux:w output 0 output adder_in0_mux:e->adder:w adder_in1_mux 0 adder_in1_mux 0 1 2 adder_in1_mux:e->adder:w memory0_in0_mux 0 memory0_in0_mux 0 1 memory0_in0_mux:e->memory0:w cmul_in0_mux 0 cmul_in0_mux 0 1 cmul_in0_mux:e->cmul:w _wl_in_1 adder _wl_in_1:e->memory0_in0_mux:w _wl_in_2 adder _wl_in_2:e->cmul_in0_mux:w _wl_in_3 adder _wl_in_3:e->adder_in1_mux:w _wl_in_4 adder _wl_in_4:e->output:w _wl_in_6 cmul _wl_in_6:e->memory0_in0_mux:w _wl_in_7 cmul _wl_in_7:e->adder_in0_mux:w _wl_in_8 cmul _wl_in_8:e->memory1:w _wl_in_10 memory0 _wl_in_10:e->cmul_in0_mux:w _wl_in_11 memory0 _wl_in_11:e->adder_in0_mux:w _wl_in_12 memory0 _wl_in_12:e->adder_in1_mux:w _wl_in_14 memory1 _wl_in_14:e->adder_in1_mux:w


Total running time of the script: (0 minutes 1.127 seconds)

Gallery generated by Sphinx-Gallery