diff options
author | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-01-26 17:33:21 +0100 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@gmx.de> | 2013-01-26 17:33:21 +0100 |
commit | ea108c8ca6e726ac008f75775ab83775ec233171 (patch) | |
tree | 982816b58d50e1b12b7eeb2c29fe22ca8d9c195b /sys/src/9/teg2/archtegra.c | |
parent | 43e09c468b4c6562c93c9375a316012e238d21b2 (diff) |
add tegra2 soc kernel (from sources)
Diffstat (limited to 'sys/src/9/teg2/archtegra.c')
-rw-r--r-- | sys/src/9/teg2/archtegra.c | 869 |
1 files changed, 869 insertions, 0 deletions
diff --git a/sys/src/9/teg2/archtegra.c b/sys/src/9/teg2/archtegra.c new file mode 100644 index 000000000..41750b747 --- /dev/null +++ b/sys/src/9/teg2/archtegra.c @@ -0,0 +1,869 @@ +/* + * nvidia tegra 2 architecture-specific stuff + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "io.h" +#include "arm.h" + +#include "../port/netif.h" +#include "etherif.h" +#include "../port/flashif.h" +#include "../port/usb.h" +#include "../port/portusbehci.h" +#include "usbehci.h" + +enum { + /* hardware limits imposed by register contents or layouts */ + Maxcpus = 4, + Maxflowcpus = 2, + + Debug = 0, +}; + +typedef struct Clkrst Clkrst; +typedef struct Diag Diag; +typedef struct Flow Flow; +typedef struct Scu Scu; +typedef struct Power Power; + +struct Clkrst { + ulong rstsrc; + ulong rstdevl; + ulong rstdevh; + ulong rstdevu; + + ulong clkoutl; + ulong clkouth; + ulong clkoutu; + + uchar _pad0[0x24-0x1c]; + ulong supcclkdiv; /* super cclk divider */ + ulong _pad1; + ulong supsclkdiv; /* super sclk divider */ + + uchar _pad4[0x4c-0x30]; + ulong clkcpu; + + uchar _pad1[0xe0-0x50]; + ulong pllxbase; /* pllx controls CPU clock speed */ + ulong pllxmisc; + ulong pllebase; /* plle is dedicated to pcie */ + ulong pllemisc; + + uchar _pad2[0x340-0xf0]; + ulong cpuset; + ulong cpuclr; +}; + +enum { + /* rstsrc bits */ + Wdcpurst = 1<<0, + Wdcoprst = 1<<1, + Wdsysrst = 1<<2, + Wdsel = 1<<4, /* tmr1 or tmr2? */ + Wdena = 1<<5, + + /* devl bits */ + Sysreset = 1<<2, + + /* clkcpu bits */ + Cpu1stop = 1<<9, + Cpu0stop = 1<<8, + + /* cpu* bits */ + Cpu1dbgreset = 1<<13, + Cpu0dbgreset = 1<<12, + Cpu1wdreset = 1<<9, + Cpu0wdreset = 1<<8, + Cpu1dereset = 1<<5, + Cpu0dereset = 1<<4, + Cpu1reset = 1<<1, + Cpu0reset = 1<<0, +}; + +struct Power { + ulong ctl; /* mainly for rtc clock signals */ + ulong secregdis; + ulong swrst; + + ulong wakevmask; + ulong waklvl; + ulong waksts; + ulong swwaksts; + + ulong dpdpadsovr; /* deep power down pads override */ + ulong dpdsample; + ulong dpden; + + ulong gatetimroff; + ulong gatetimron; + ulong toggle; + ulong unclamp; + ulong gatests; /* ro */ + + ulong goodtmr; + ulong blinktmr; + + ulong noiopwr; + ulong detect; + ulong detlatch; + + ulong scratch[24]; + ulong secscratch[6]; + + ulong cpupwrgoodtmr; + ulong cpupwrofftmr; + + ulong pgmask[2]; + + ulong autowaklvl; + ulong autowaklvlmask; + ulong wakdelay; + + ulong detval; + ulong ddr; + ulong usbdebdel; /* usb de-bounce delay */ + ulong usbao; + ulong cryptoop; + ulong pllpwb0ovr; + ulong scratch24[42-24+1]; + ulong boundoutmirr[3]; + ulong sys33ven; + ulong boundoutmirracc; + ulong gate; +}; + +enum { + /* toggle bits */ + Start = 1<<8, + /* partition ids */ + Partpcie= 3, + Partl2 = 4, +}; + +struct Scu { + ulong ctl; + ulong cfg; /* ro */ + ulong cpupwrsts; + ulong inval; + + uchar _pad0[0x40-0x10]; + ulong filtstart; + ulong filtend; + + uchar _pad1[0x50-0x48]; + ulong accctl; /* initially 0 */ + ulong nsaccctl; +}; + +enum { + /* ctl bits */ + Scuenable = 1<<0, + Filter = 1<<1, + Scuparity = 1<<2, + Specfill = 1<<3, /* only for PL310 */ + Allport0 = 1<<4, + Standby = 1<<5, + Icstandby = 1<<6, +}; + +struct Flow { + ulong haltcpu0; + ulong haltcop; + ulong cpu0; + ulong cop; + ulong xrq; + ulong haltcpu1; + ulong cpu1; +}; + +enum { + /* haltcpu* bits */ + Stop = 2<<29, + + /* cpu* bits */ + Event = 1<<14, /* w1c */ + Waitwfebitsshift = 4, + Waitwfebitsmask = MASK(2), + Eventenable = 1<<1, + Cpuenable = 1<<0, +}; + +struct Diag { + Cacheline c0; + Lock; + long cnt; + long sync; + Cacheline c1; +}; + +extern ulong testmem; + +/* + * number of cpus available. contrast with conf.nmach, which is number + * of running cpus. + */ +int navailcpus; +Isolated l1ptstable; + +Soc soc = { + .clkrst = 0x60006000, /* clock & reset signals */ + .power = 0x7000e400, + .exceptvec = PHYSEVP, /* undocumented magic */ + .sema = 0x60001000, + .l2cache= PHYSL2BAG, /* pl310 bag on the side */ + .flow = 0x60007000, + + /* 4 non-gic controllers */ +// .intr = { 0x60004000, 0x60004100, 0x60004200, 0x60004300, }, + + /* private memory region */ + .scu = 0x50040000, + /* we got this address from the `cortex-a series programmer's guide'. */ + .intr = 0x50040100, /* per-cpu interface */ + .glbtmr = 0x50040200, + .loctmr = 0x50040600, + .intrdist=0x50041000, + + .uart = { 0x70006000, 0x70006040, + 0x70006200, 0x70006300, 0x70006400, }, + + .rtc = 0x7000e000, + .tmr = { 0x60005000, 0x60005008, 0x60005050, 0x60005058, }, + .µs = 0x60005010, + + .pci = 0x80000000, + .ether = 0xa0024000, + + .nand = 0x70008000, + .nor = 0x70009000, /* also VIRTNOR */ + + .ehci = P2VAHB(0xc5000000), /* 1st of 3 */ + .ide = P2VAHB(0xc3000000), + + .gpio = { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180, + 0x6000d200, 0x6000d280, 0x6000d300, }, + .spi = { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, }, + .twsi = 0x7000c000, + .mmc = { P2VAHB(0xc8000000), P2VAHB(0xc8000200), + P2VAHB(0xc8000400), P2VAHB(0xc8000600), }, +}; + +static volatile Diag diag; +static int missed; + +void +dumpcpuclks(void) /* run CPU at full speed */ +{ + Clkrst *clk = (Clkrst *)soc.clkrst; + + iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc); + iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc); + iprint("super cclk divider %#lux\n", clk->supcclkdiv); + iprint("super sclk divider %#lux\n", clk->supsclkdiv); +} + +static char * +devidstr(ulong) +{ + return "ARM Cortex-A9"; +} + +void +archtegralink(void) +{ +} + +/* convert AddrDevid register to a string in buf and return buf */ +char * +cputype2name(char *buf, int size) +{ + ulong r; + + r = cpidget(); /* main id register */ + assert((r >> 24) == 'A'); + seprint(buf, buf + size, "Cortex-A9 r%ldp%ld", + (r >> 20) & MASK(4), r & MASK(4)); + return buf; +} + +static void +errata(void) +{ + ulong reg, r, p; + + /* apply cortex-a9 errata workarounds */ + r = cpidget(); /* main id register */ + assert((r >> 24) == 'A'); + p = r & MASK(4); /* minor revision */ + r >>= 20; + r &= MASK(4); /* major revision */ + + /* this is an undocumented `diagnostic register' that linux knows */ + reg = cprdsc(0, CpDTLB, 0, 1); + if (r < 2 || r == 2 && p <= 2) + reg |= 1<<4; /* 742230 */ + if (r == 2 && p <= 2) + reg |= 1<<6 | 1<<12 | 1<<22; /* 743622, 2×742231 */ + if (r < 3) + reg |= 1<<11; /* 751472 */ + cpwrsc(0, CpDTLB, 0, 1, reg); +} + +void +archconfinit(void) +{ + char *p; + ulong hz; + + assert(m != nil); + m->cpuhz = 1000 * Mhz; /* trimslice speed */ + p = getconf("*cpumhz"); + if (p) { + hz = atoi(p) * Mhz; + if (hz >= 100*Mhz && hz <= 3600UL*Mhz) + m->cpuhz = hz; + } + m->delayloop = m->cpuhz/2000; /* initial estimate */ + errata(); +} + +int +archether(unsigned ctlrno, Ether *ether) +{ + switch(ctlrno) { + case 0: + ether->type = "rtl8169"; /* pci-e ether */ + ether->ctlrno = ctlrno; + ether->irq = Pcieirq; /* non-msi pci-e intr */ + ether->nopt = 0; + ether->mbps = 1000; + return 1; + } + return -1; +} + +void +dumpscustate(void) +{ + Scu *scu = (Scu *)soc.scu; + + print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl); + print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno, + (scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1); + print("cpus' power %#lux\n", scu->cpupwrsts); +} + +void +scuon(void) +{ + Scu *scu = (Scu *)soc.scu; + + if (scu->ctl & Scuenable) + return; + scu->inval = MASK(16); + coherence(); + scu->ctl = Scuparity | Scuenable | Specfill; + coherence(); +} + +int +getncpus(void) +{ + int n; + char *p; + Scu *scu; + + if (navailcpus == 0) { + scu = (Scu *)soc.scu; + navailcpus = (scu->cfg & MASK(2)) + 1; + if (navailcpus > MAXMACH) + navailcpus = MAXMACH; + + p = getconf("*ncpu"); + if (p && *p) { + n = atoi(p); + if (n > 0 && n < navailcpus) + navailcpus = n; + } + } + return navailcpus; +} + +void +cpuidprint(void) +{ + char name[64]; + + cputype2name(name, sizeof name); + delay(50); /* let uart catch up */ + iprint("cpu%d: %lldMHz ARM %s %s-endian\n", + m->machno, m->cpuhz / Mhz, name, + getpsr() & PsrBigend? "big": "little"); +} + +static void +clockson(void) +{ + Clkrst *clk = (Clkrst *)soc.clkrst; + + /* enable all by clearing resets */ + clk->rstdevl = clk->rstdevh = clk->rstdevu = 0; + coherence(); + clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */ + coherence(); + + clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena; + coherence(); +} + +/* we could be shutting down ourself (if cpu == m->machno), so take care. */ +void +stopcpu(uint cpu) +{ + Flow *flow = (Flow *)soc.flow; + Clkrst *clk = (Clkrst *)soc.clkrst; + + if (cpu == 0) { + iprint("stopcpu: may not stop cpu0\n"); + return; + } + + machoff(cpu); + lock(&active); + active.stopped |= 1 << cpu; + unlock(&active); + l1cache->wb(); + + /* shut down arm7 avp coproc so it can't cause mischief. */ + /* could try watchdog without stopping avp. */ + flow->haltcop = Stop; + coherence(); + flow->cop = 0; /* no Cpuenable */ + coherence(); + delay(10); + + assert(cpu < Maxflowcpus); + *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop; + coherence(); + *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0; /* no Cpuenable */ + coherence(); + delay(10); + + /* cold reset */ + assert(cpu < Maxcpus); + clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu; + coherence(); + delay(1); + + l1cache->wb(); +} + +static void +synccpus(volatile long *cntp, int n) +{ + ainc(cntp); + while (*cntp < n) + ; + /* all cpus should now be here */ +} + +static void +pass1(int pass, volatile Diag *dp) +{ + int i; + + if(m->machno == 0) + iprint(" %d", pass); + for (i = 1000*1000; --i > 0; ) { + ainc(&dp->cnt); + adec(&dp->cnt); + } + + synccpus(&dp->sync, navailcpus); + /* all cpus are now here */ + + ilock(dp); + if(dp->cnt != 0) + panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt); + iunlock(dp); + + synccpus(&dp->sync, 2 * navailcpus); + /* all cpus are now here */ + adec(&dp->sync); + adec(&dp->sync); +} + +/* + * try to confirm coherence of l1 caches. + * assume that all available cpus will be started. + */ +void +l1diag(void) +{ + int pass; + volatile Diag *dp; + + if (!Debug) + return; + + l1cache->wb(); + + /* + * synchronise and print + */ + dp = &diag; + ilock(dp); + if (m->machno == 0) + iprint("l1: waiting for %d cpus... ", navailcpus); + iunlock(dp); + + synccpus(&dp->sync, navailcpus); + + ilock(dp); + if (m->machno == 0) + iprint("cache coherency pass"); + iunlock(dp); + + synccpus(&dp->sync, 2 * navailcpus); + adec(&dp->sync); + adec(&dp->sync); + + /* + * cpus contend + */ + for (pass = 0; pass < 3; pass++) + pass1(pass, dp); + + /* + * synchronise and check sanity + */ + synccpus(&dp->sync, navailcpus); + + if(dp->sync < navailcpus || dp->sync >= 2 * navailcpus) + panic("cpu%d: diag: failed w dp->sync %ld", m->machno, + dp->sync); + if(dp->cnt != 0) + panic("cpu%d: diag: failed w dp->cnt %ld", m->machno, + dp->cnt); + + ilock(dp); + iprint(" cpu%d ok", m->machno); + iunlock(dp); + + synccpus(&dp->sync, 2 * navailcpus); + adec(&dp->sync); + adec(&dp->sync); + l1cache->wb(); + + /* + * all done, print + */ + ilock(dp); + if (m->machno == 0) + iprint("\n"); + iunlock(dp); +} + +static void +unfreeze(uint cpu) +{ + Clkrst *clk = (Clkrst *)soc.clkrst; + Flow *flow = (Flow *)soc.flow; + + assert(cpu < Maxcpus); + + clk->clkcpu &= ~(Cpu0stop << cpu); + coherence(); + /* out of reset */ + clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) << + cpu; + coherence(); + + assert(cpu < Maxflowcpus); + *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0; + coherence(); + *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */ + coherence(); +} + +/* + * this is all a bit magic. the soc.exceptvec register is effectively + * undocumented. we had to look at linux and experiment, alas. this is the + * sort of thing that should be standardised as part of the cortex mpcore spec. + * even intel document their equivalent procedure. + */ +int +startcpu(uint cpu) +{ + int i, r; + ulong oldvec, rstaddr; + ulong *evp = (ulong *)soc.exceptvec; /* magic */ + + r = 0; + if (getncpus() < 2 || cpu == m->machno || + cpu >= MAXMACH || cpu >= navailcpus) + return -1; + + oldvec = *evp; + l1cache->wb(); /* start next cpu w same view of ram */ + *evp = rstaddr = PADDR(_vrst); /* will start cpu executing at _vrst */ + coherence(); + l1cache->wb(); + unfreeze(cpu); + + for (i = 2000; i > 0 && *evp == rstaddr; i--) + delay(1); + if (i <= 0 || *evp != cpu) { + iprint("cpu%d: didn't start!\n", cpu); + stopcpu(cpu); /* make sure it's stopped */ + r = -1; + } + *evp = oldvec; + return r; +} + +static void +cksecure(void) +{ + ulong db; + extern ulong getdebug(void); + + if (getscr() & 1) + panic("cpu%d: running non-secure", m->machno); + db = getdebug(); + if (db) + iprint("cpu%d: debug enable reg %#lux\n", m->machno, db); +} + +ulong +smpon(void) +{ + ulong aux; + + /* cortex-a9 model-specific configuration */ + aux = getauxctl(); + putauxctl(aux | CpACsmp | CpACmaintbcast); + return aux; +} + +void +cortexa9cachecfg(void) +{ + /* cortex-a9 model-specific configuration */ + putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref); +} + +/* + * called on a cpu other than 0 from cpureset in l.s, + * from _vrst in lexception.s. + * mmu and l1 (and system-wide l2) caches and coherency (smpon) are on, + * but interrupts are disabled. + * our mmu is using an exact copy of cpu0's l1 page table + * as it was after userinit ran. + */ +void +cpustart(void) +{ + int ms; + ulong *evp; + Power *pwr; + + up = nil; + if (active.machs & (1<<m->machno)) { + serialputc('?'); + serialputc('r'); + panic("cpu%d: resetting after start", m->machno); + } + assert(m->machno != 0); + + errata(); + cortexa9cachecfg(); + memdiag(&testmem); + + machinit(); /* bumps nmach, adds bit to machs */ + machoff(m->machno); /* not ready to go yet */ + + /* clock signals and scu are system-wide and already on */ + clockshutdown(); /* kill any watch-dog timer */ + + trapinit(); + clockinit(); /* sets loop delay */ + timersinit(); + cpuidprint(); + + /* + * notify cpu0 that we're up so it can proceed to l1diag. + */ + evp = (ulong *)soc.exceptvec; /* magic */ + *evp = m->machno; + coherence(); + + l1diag(); /* contend with other cpus to verify sanity */ + + /* + * pwr->noiopwr == 0 + * pwr->detect == 0x1ff (default, all disabled) + */ + pwr = (Power *)soc.power; + assert(pwr->gatests == MASK(7)); /* everything has power */ + + /* + * 8169 has to initialise before we get past this, thus cpu0 + * has to schedule processes first. + */ + if (Debug) + iprint("cpu%d: waiting for 8169\n", m->machno); + for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) { + delay(10); + cachedinvse(&l1ptstable.word, sizeof l1ptstable.word); + } + if (!l1ptstable.word) + iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno); + /* now safe to copy cpu0's l1 pt in mmuinit */ + + mmuinit(); /* update our l1 pt from cpu0's */ + fpon(); + machon(m->machno); /* now ready to go and be scheduled */ + + if (Debug) + iprint("cpu%d: scheding\n", m->machno); + schedinit(); + panic("cpu%d: schedinit returned", m->machno); +} + +/* mainly used to break out of wfi */ +void +sgintr(Ureg *ureg, void *) +{ + iprint("cpu%d: got sgi\n", m->machno); + /* try to prod cpu1 into life when it gets stuck */ + if (m->machno != 0) + clockprod(ureg); +} + +void +archreset(void) +{ + static int beenhere; + + if (beenhere) + return; + beenhere = 1; + + /* conservative temporary values until archconfinit runs */ + m->cpuhz = 1000 * Mhz; /* trimslice speed */ + m->delayloop = m->cpuhz/2000; /* initial estimate */ + + prcachecfg(); + + clockson(); + /* all partitions were powered up by u-boot, so needn't do anything */ + archconfinit(); +// resetusb(); + fpon(); + + if (irqtooearly) + panic("archreset: too early for irqenable"); + irqenable(Cpu0irq, sgintr, nil, "cpu0"); + irqenable(Cpu1irq, sgintr, nil, "cpu1"); + /* ... */ +} + +void +archreboot(void) +{ + Clkrst *clk = (Clkrst *)soc.clkrst; + + assert(m->machno == 0); + iprint("archreboot: reset!\n"); + delay(20); + + clk->rstdevl |= Sysreset; + coherence(); + delay(500); + + /* shouldn't get here */ + splhi(); + iprint("awaiting reset"); + for(;;) { + delay(1000); + print("."); + } +} + +void +kbdinit(void) +{ +} + +static void +missing(ulong addr, char *name) +{ + static int firstmiss = 1; + + if (addr == 0) { + iprint("address zero for %s\n", name); + return; + } + if (probeaddr(addr) >= 0) + return; + missed++; + if (firstmiss) { + iprint("missing:"); + firstmiss = 0; + } else + iprint(",\n\t"); + iprint(" %s at %#lux", name, addr); +} + +/* verify that all the necessary device registers are accessible */ +void +chkmissing(void) +{ + delay(10); + missing(KZERO, "dram"); + missing(soc.intr, "intr ctlr"); + missing(soc.intrdist, "intr distrib"); + missing(soc.tmr[0], "tegra timer1"); + missing(soc.uart[0], "console uart"); + missing(soc.pci, "pcie"); + missing(soc.ether, "ether8169"); + missing(soc.µs, "µs counter"); + if (missed) + iprint("\n"); + delay(10); +} + +void +archflashwp(Flash*, int) +{ +} + +/* + * for ../port/devflash.c:/^flashreset + * retrieve flash type, virtual base and length and return 0; + * return -1 on error (no flash) + */ +int +archflashreset(int bank, Flash *f) +{ + if(bank != 0) + return -1; +panic("archflashreset: rewrite for nor & nand flash on ts"); + /* + * this is set up for the igepv2 board. + */ + f->type = "onenand"; + f->addr = (void*)VIRTNOR; /* mapped here by archreset */ + f->size = 0; /* done by probe */ + f->width = 1; + f->interleave = 0; + return 0; +} |