Five-point Winograd DFTΒΆ

First, define the SFG/block diagram

from math import cos, pi, sin

import matplotlib.pyplot as plt
import networkx as nx

from b_asic.architecture import Architecture, Memory, ProcessingElement
from b_asic.core_operations import AddSub, ConstantMultiplication
from b_asic.fft_operations import R2Butterfly
from b_asic.schedule import Schedule
from b_asic.sfg import SFG
from b_asic.special_operations import Input, Output

u = -2 * pi / 5
c50 = (cos(u) + cos(2 * u)) / 2 - 1
c51 = (cos(u) - cos(2 * u)) / 2
c52 = 1j * (sin(u) + sin(2 * u)) / 2
c53 = 1j * (sin(2 * u))
c54 = 1j * (sin(u) - sin(2 * u))


in0 = Input("x0")
in1 = Input("x1")
in2 = Input("x2")
in3 = Input("x3")
in4 = Input("x4")
bf0 = R2Butterfly(in1, in3)
bf1 = R2Butterfly(in4, in2)
bf2 = R2Butterfly(bf0.output(0), bf1.output(0))
a0 = AddSub(True, bf0.output(1), bf1.output(0))
a1 = AddSub(True, bf2.output(0), in0)
# Should overload float*OutputPort as well
m0 = ConstantMultiplication(c50, bf2.output(0))
m1 = ConstantMultiplication(c51, bf0.output(1))
m2 = c52 * a0
m3 = ConstantMultiplication(c53, bf2.output(1))
m4 = ConstantMultiplication(c54, bf1.output(1))
a2 = AddSub(True, m0, a1)
a3 = AddSub(False, m3, m2)
a4 = AddSub(True, m3, m4)
bf3 = R2Butterfly(a2, m1)
bf4 = R2Butterfly(bf3.output(0), a3)
bf5 = R2Butterfly(bf3.output(1), a4)

out0 = Output(a1, "X0")
out1 = Output(bf4.output(0), "X1")
out2 = Output(bf4.output(1), "X2")
out4 = Output(bf5.output(0), "X4")
out3 = Output(bf5.output(1), "X3")

sfg = SFG(
    inputs=[in0, in1, in2, in3, in4],
    outputs=[out0, out1, out2, out3, out4],
    name="5-point Winograd DFT",
)

The SFG looks like

sfg
%3 in0 x0 (in0) addsub0 addsub0 in0:e->addsub0 1 in1 x1 (in1) addsub0.0 addsub0->addsub0.0 r2bfly5 r2bfly5 in1:e->r2bfly5 0 in2 x2 (in2) r2bfly0 r2bfly0 r2bfly5->r2bfly0 0 0 r2bfly5.1 r2bfly5->r2bfly5.1 1 r2bfly4 r2bfly4 in2:e->r2bfly4 1 in3 x3 (in3) cmul3 cmul3 r2bfly4->cmul3 1 r2bfly4.0 r2bfly4->r2bfly4.0 0 in3:e->r2bfly5 1 in4 x4 (in4) in4:e->r2bfly4 0 out0 X0 (out0) out1 X1 (out1) addsub0.0->out0:w addsub1 addsub1 addsub0.0->addsub1 1 out2 X2 (out2) r2bfly2 r2bfly2 r2bfly2->out1:w 0 r2bfly2->out2:w 1 out3 X3 (out3) out4 X4 (out4) r2bfly3 r2bfly3 r2bfly3->out3:w 1 r2bfly3->out4:w 0 r2bfly0.0 r2bfly0.0->addsub0 0 cmul0 cmul0 r2bfly0.0->cmul0 r2bfly0->r2bfly0.0 0 cmul2 cmul2 r2bfly0->cmul2 1 r2bfly1 r2bfly1 addsub1->r2bfly1 0 cmul0->addsub1 0 r2bfly1->r2bfly2 0 0 r2bfly1->r2bfly3 0 1 cmul1 cmul1 cmul1->r2bfly1 1 addsub2 addsub2 addsub2->r2bfly3 1 cmul2.0 cmul2.0->addsub2 0 addsub4 addsub4 cmul2.0->addsub4 0 cmul2->cmul2.0 cmul3->addsub2 1 r2bfly4.0->r2bfly0 1 addsub3 addsub3 r2bfly4.0->addsub3 1 cmul4 cmul4 addsub3->cmul4 r2bfly5.1->cmul1 r2bfly5.1->addsub3 0 cmul4->addsub4 1 addsub4->r2bfly2 1


Set latencies and execution times

sfg.set_latency_of_type(ConstantMultiplication, 2)
sfg.set_latency_of_type(AddSub, 1)
sfg.set_latency_of_type(R2Butterfly, 1)
sfg.set_execution_time_of_type(ConstantMultiplication, 1)
sfg.set_execution_time_of_type(AddSub, 1)
sfg.set_execution_time_of_type(R2Butterfly, 1)

Generate schedule

schedule = Schedule(sfg, cyclic=True)
schedule
2026-06-03T14:26:50.106171 image/svg+xml Matplotlib v3.10.9, https://matplotlib.org/


Reschedule to only use one AddSub, one R2Butterfly, and one ConstantMultiplication per time unit

schedule.set_schedule_time(10)
schedule.move_operation("out4", 12)
schedule.move_operation("out3", 11)
schedule.move_operation("out2", 10)
schedule.move_operation("out1", 9)
schedule.move_operation("out0", 12)
schedule.move_operation("r2bfly3", 10)
schedule.move_operation("r2bfly2", 9)
schedule.move_operation("r2bfly1", 7)
schedule.move_operation("addsub4", 5)
schedule.move_operation("addsub2", 5)
schedule.move_operation("addsub1", 5)
schedule.move_operation("cmul4", 4)
schedule.move_operation("cmul2", 4)
schedule.move_operation("cmul0", 5)
schedule.move_operation("addsub0", 6)
schedule.move_operation("cmul1", 6)
schedule.move_operation("addsub3", 4)
schedule.move_operation("r2bfly0", 4)
schedule.move_operation("cmul3", 6)
schedule.move_operation("r2bfly5", 4)
schedule.move_operation("r2bfly4", 4)
schedule.move_operation("in1", 1)
schedule.move_operation("in2", 2)
schedule.move_operation("in3", 3)
schedule.move_operation("in4", 4)
schedule.move_operation("r2bfly5", -1)
schedule.move_operation("r2bfly3", 1)
schedule.move_operation("cmul2", 1)
schedule.move_operation("cmul4", 1)
schedule.move_operation("r2bfly0", 1)
schedule.move_operation("addsub0", -1)
schedule.move_operation("cmul1", -3)
schedule.move_operation("cmul3", -2)
schedule.move_operation("cmul4", -1)
schedule.move_operation("addsub4", 1)
schedule.move_operation("addsub1", 2)
schedule.move_operation("cmul0", 1)
schedule.move_operation("r2bfly0", -1)
schedule.move_operation("addsub0", -1)
schedule.move_operation("r2bfly2", -1)
schedule.move_operation("cmul2", -1)
schedule.move_operation("cmul4", 1)
schedule.move_operation("addsub2", -1)
schedule.move_operation("addsub4", -1)
schedule.move_operation("addsub1", -1)
schedule.move_operation("r2bfly1", -1)
schedule.move_operation("r2bfly2", -2)
schedule.move_operation("r2bfly3", -1)
schedule
2026-06-03T14:26:50.298009 image/svg+xml Matplotlib v3.10.9, https://matplotlib.org/


Extract memory variables and operation executions

operations = schedule.get_operations()
adders = operations.get_by_type_name(AddSub.type_name())
adders.show(title="AddSub executions")
mults = operations.get_by_type_name("cmul")
mults.show(title="Multiplier executions")
butterflies = operations.get_by_type_name(R2Butterfly.type_name())
butterflies.show(title="R2Butterfly executions")
inputs = operations.get_by_type_name("in")
inputs.show(title="Input executions")
outputs = operations.get_by_type_name("out")
outputs.show(title="Output executions")

addsub = ProcessingElement(adders, entity_name="addsub")
butterfly = ProcessingElement(butterflies, entity_name="butterfly")
multiplier = ProcessingElement(mults, entity_name="multiplier")
pe_in = ProcessingElement(inputs, entity_name="input")
pe_out = ProcessingElement(outputs, entity_name="output")

mem_vars = schedule.get_memory_variables()
mem_vars.show(title="All memory variables")
direct, mem_vars = mem_vars.split_on_length()
mem_vars.show(title="Non-zero time memory variables")
direct.show(title="Direct interconnects")
mem_vars_set = mem_vars.split_on_ports(read_ports=1, write_ports=1, total_ports=2)

memories = []
for i, mem in enumerate(mem_vars_set):
    memory = Memory(mem, memory_type="RAM", entity_name=f"memory{i}")
    memories.append(memory)
    mem.show(title=f"{memory.entity_name} variables")
    memory.assign("left_edge")
    memory.show_content(title=f"Assigned {memory.entity_name}")

fig, ax = plt.subplots()
fig.suptitle("Exclusion graph based on ports")
nx.draw(mem_vars.exclusion_graph_from_ports(1, 1, 2), ax=ax)
  • AddSub executions
  • Multiplier executions
  • R2Butterfly executions
  • Input executions
  • Output executions
  • All memory variables
  • Non-zero time memory variables
  • Direct interconnects
  • memory0 variables
  • Assigned memory0
  • memory1 variables
  • Assigned memory1
  • memory2 variables
  • Assigned memory2
  • memory3 variables
  • Assigned memory3
  • memory4 variables
  • Assigned memory4
  • Exclusion graph based on ports

Create architecture

arch = Architecture(
    [addsub, butterfly, multiplier, pe_in, pe_out],
    memories,
    direct_interconnects=direct,
)

arch
%3 cluster_memories Memories cluster_pes Processing Elements cluster_io_in Inputs cluster_io_out Outputs memory0 0 memory0 : (RAM, 3 cells) 0 _wl_out_26 memory0 memory0:e->_wl_out_26 memory1 0 memory1 : (RAM, 2 cells) 0 _wl_out_6 memory1 memory1:e->_wl_out_6 memory2 0 memory2 : (RAM, 2 cells) 0 _wl_out_19 memory2 memory2:e->_wl_out_19 memory3 0 memory3 : (RAM, 1 cell) 0 _wl_out_23 memory3 memory3:e->_wl_out_23 memory4 0 memory4 : (RAM, 1 cell) 0 _wl_out_41 memory4 memory4:e->_wl_out_41 addsub 0 addsub 0 1 _wl_out_31 addsub addsub:e->_wl_out_31 butterfly 0 butterfly 0 1 1 _wl_out_11 butterfly_0 butterfly:e->_wl_out_11 _wl_out_36 butterfly_1 butterfly:e->_wl_out_36 multiplier 0 multiplier 0 _wl_out_0 multiplier multiplier:e->_wl_out_0 input input 0 inputout0_branch input:e->inputout0_branch output 0 output addsub_in0_mux 0 addsub_in0_mux 0 1 2 addsub_in0_mux:e->addsub:w addsub_in1_mux 0 addsub_in1_mux 0 1 2 3 4 addsub_in1_mux:e->addsub:w butterfly_in0_mux 0 butterfly_in0_mux 0 1 2 3 4 butterfly_in0_mux:e->butterfly:w memory1_in0_mux 0 memory1_in0_mux 0 1 2 3 memory1_in0_mux:e->memory1:w memory3_in0_mux 0 memory3_in0_mux 0 1 memory3_in0_mux:e->memory3:w memory2_in0_mux 0 memory2_in0_mux 0 1 memory2_in0_mux:e->memory2:w butterfly_in1_mux 0 butterfly_in1_mux 0 1 2 3 4 butterfly_in1_mux:e->butterfly:w memory0_in0_mux 0 memory0_in0_mux 0 1 2 3 memory0_in0_mux:e->memory0:w multiplier_in0_mux 0 multiplier_in0_mux 0 1 2 multiplier_in0_mux:e->multiplier:w output_in0_mux 0 output_in0_mux 0 1 2 output_in0_mux:e->output:w _wl_in_1 multiplier _wl_in_1:e->memory0_in0_mux:w _wl_in_2 multiplier _wl_in_2:e->memory1_in0_mux:w _wl_in_3 multiplier _wl_in_3:e->memory3_in0_mux:w _wl_in_4 multiplier _wl_in_4:e->addsub_in1_mux:w _wl_in_5 multiplier _wl_in_5:e->addsub_in0_mux:w _wl_in_7 memory1 _wl_in_7:e->butterfly_in0_mux:w _wl_in_8 memory1 _wl_in_8:e->addsub_in0_mux:w _wl_in_9 memory1 _wl_in_9:e->butterfly_in1_mux:w _wl_in_10 memory1 _wl_in_10:e->multiplier_in0_mux:w _wl_in_12 butterfly_0 _wl_in_12:e->addsub_in0_mux:w _wl_in_13 butterfly_0 _wl_in_13:e->memory0_in0_mux:w _wl_in_14 butterfly_0 _wl_in_14:e->butterfly_in1_mux:w _wl_in_15 butterfly_0 _wl_in_15:e->memory1_in0_mux:w _wl_in_16 butterfly_0 _wl_in_16:e->butterfly_in0_mux:w _wl_in_17 butterfly_0 _wl_in_17:e->addsub_in1_mux:w _wl_in_18 butterfly_0 _wl_in_18:e->memory2_in0_mux:w _wl_in_20 memory2 _wl_in_20:e->output_in0_mux:w _wl_in_21 memory2 _wl_in_21:e->addsub_in1_mux:w _wl_in_22 memory2 _wl_in_22:e->multiplier_in0_mux:w _wl_in_24 memory3 _wl_in_24:e->addsub_in1_mux:w _wl_in_25 memory3 _wl_in_25:e->butterfly_in1_mux:w _wl_in_27 memory0 _wl_in_27:e->output_in0_mux:w _wl_in_28 memory0 _wl_in_28:e->addsub_in1_mux:w _wl_in_29 memory0 _wl_in_29:e->butterfly_in0_mux:w _wl_in_30 memory0 _wl_in_30:e->butterfly_in1_mux:w _wl_in_32 addsub _wl_in_32:e->memory2_in0_mux:w _wl_in_33 addsub _wl_in_33:e->memory3_in0_mux:w _wl_in_34 addsub _wl_in_34:e->butterfly_in0_mux:w _wl_in_35 addsub _wl_in_35:e->memory1_in0_mux:w inputout0_branch->butterfly_in0_mux:w inputout0_branch->butterfly_in1_mux:w inputout0_branch->memory0_in0_mux:w _wl_in_37 butterfly_1 _wl_in_37:e->memory1_in0_mux:w _wl_in_38 butterfly_1 _wl_in_38:e->memory4:w _wl_in_39 butterfly_1 _wl_in_39:e->multiplier_in0_mux:w _wl_in_40 butterfly_1 _wl_in_40:e->memory0_in0_mux:w _wl_in_42 memory4 _wl_in_42:e->output_in0_mux:w


Move memory variables to optimize architecture

arch.move_process("addsub2.0", "memory3", "memory2")
arch.move_process("r2bfly2.0", "memory2", "memory3")
arch.move_process("cmul2.0", "memory1", "memory0")
arch.move_process("r2bfly3.0", "memory0", "memory1")
arch.move_process("r2bfly3.1", "memory4", "memory0")

arch.assign_resources()

Memory 4 is now empty, so remove it.

arch.remove_resource("memory4")

for memory in arch.memories:
    memory.show_content(title=f"Improved {memory.entity_name}")

arch
  • Improved memory0
  • Improved memory1
  • Improved memory2
  • Improved memory3
%3 cluster_memories Memories cluster_pes Processing Elements cluster_io_in Inputs cluster_io_out Outputs memory0 0 memory0 : (RAM, 3 cells) 0 _wl_out_5 memory0 memory0:e->_wl_out_5 memory1 0 memory1 : (RAM, 2 cells) 0 _wl_out_11 memory1 memory1:e->_wl_out_11 memory2 0 memory2 : (RAM, 2 cells) 0 _wl_out_25 memory2 memory2:e->_wl_out_25 memory3 0 memory3 : (RAM, 1 cell) 0 _wl_out_30 memory3 memory3:e->_wl_out_30 addsub 0 addsub 0 1 _wl_out_33 addsub addsub:e->_wl_out_33 butterfly 0 butterfly 0 1 1 _wl_out_17 butterfly_0 butterfly:e->_wl_out_17 _wl_out_37 butterfly_1 butterfly:e->_wl_out_37 multiplier 0 multiplier 0 _wl_out_0 multiplier multiplier:e->_wl_out_0 input input 0 inputout0_branch input:e->inputout0_branch output 0 output addsub_in0_mux 0 addsub_in0_mux 0 1 2 3 addsub_in0_mux:e->addsub:w addsub_in1_mux 0 addsub_in1_mux 0 1 2 3 4 addsub_in1_mux:e->addsub:w butterfly_in0_mux 0 butterfly_in0_mux 0 1 2 3 4 butterfly_in0_mux:e->butterfly:w memory1_in0_mux 0 memory1_in0_mux 0 1 2 memory1_in0_mux:e->memory1:w butterfly_in1_mux 0 butterfly_in1_mux 0 1 2 3 4 butterfly_in1_mux:e->butterfly:w memory0_in0_mux 0 memory0_in0_mux 0 1 2 3 memory0_in0_mux:e->memory0:w memory3_in0_mux 0 memory3_in0_mux 0 1 memory3_in0_mux:e->memory3:w multiplier_in0_mux 0 multiplier_in0_mux 0 1 2 multiplier_in0_mux:e->multiplier:w output_in0_mux 0 output_in0_mux 0 1 2 3 output_in0_mux:e->output:w _wl_in_1 multiplier _wl_in_1:e->addsub_in1_mux:w _wl_in_2 multiplier _wl_in_2:e->memory3_in0_mux:w _wl_in_3 multiplier _wl_in_3:e->memory0_in0_mux:w _wl_in_4 multiplier _wl_in_4:e->addsub_in0_mux:w _wl_in_6 memory0 _wl_in_6:e->addsub_in0_mux:w _wl_in_7 memory0 _wl_in_7:e->butterfly_in1_mux:w _wl_in_8 memory0 _wl_in_8:e->butterfly_in0_mux:w _wl_in_9 memory0 _wl_in_9:e->output_in0_mux:w _wl_in_10 memory0 _wl_in_10:e->addsub_in1_mux:w _wl_in_12 memory1 _wl_in_12:e->addsub_in0_mux:w _wl_in_13 memory1 _wl_in_13:e->butterfly_in1_mux:w _wl_in_14 memory1 _wl_in_14:e->butterfly_in0_mux:w _wl_in_15 memory1 _wl_in_15:e->output_in0_mux:w _wl_in_16 memory1 _wl_in_16:e->multiplier_in0_mux:w _wl_in_18 butterfly_0 _wl_in_18:e->addsub_in0_mux:w _wl_in_19 butterfly_0 _wl_in_19:e->memory1_in0_mux:w _wl_in_20 butterfly_0 _wl_in_20:e->memory0_in0_mux:w _wl_in_21 butterfly_0 _wl_in_21:e->butterfly_in1_mux:w _wl_in_22 butterfly_0 _wl_in_22:e->butterfly_in0_mux:w _wl_in_23 butterfly_0 _wl_in_23:e->addsub_in1_mux:w _wl_in_24 butterfly_0 _wl_in_24:e->memory3_in0_mux:w _wl_in_26 memory2 _wl_in_26:e->output_in0_mux:w _wl_in_27 memory2 _wl_in_27:e->addsub_in1_mux:w _wl_in_28 memory2 _wl_in_28:e->butterfly_in1_mux:w _wl_in_29 memory2 _wl_in_29:e->multiplier_in0_mux:w _wl_in_31 memory3 _wl_in_31:e->addsub_in1_mux:w _wl_in_32 memory3 _wl_in_32:e->output_in0_mux:w _wl_in_34 addsub _wl_in_34:e->butterfly_in0_mux:w _wl_in_35 addsub _wl_in_35:e->memory1_in0_mux:w _wl_in_36 addsub _wl_in_36:e->memory2:w inputout0_branch->butterfly_in0_mux:w inputout0_branch->butterfly_in1_mux:w inputout0_branch->memory0_in0_mux:w _wl_in_38 butterfly_1 _wl_in_38:e->memory1_in0_mux:w _wl_in_39 butterfly_1 _wl_in_39:e->memory0_in0_mux:w _wl_in_40 butterfly_1 _wl_in_40:e->multiplier_in0_mux:w


Total running time of the script: (0 minutes 3.363 seconds)

Gallery generated by Sphinx-Gallery