Clean up some extraneous files

619f04f2 · Donald Haase · aefcc6bd · 619f04f2 · aefcc6bd · aefcc6bd
Commit 619f04f2 authored Jun 09, 2023 by Donald Haase
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ TARGET = superfamicast.elf
 OBJS =  main.o 2xsai.o apu.o apudebug.o c4.o c4emu.o cheats2.o clip.o cpu.o data.o debug.o dma.o 
 OBJS += dsp1.o fxdbg.o fxemu.o fxinst.o gfx.o globals.o loadzip.o memmap.o netplay.o obc1.o ppu.o screenshot.o 
 OBJS += sdd1.o sdd1emu.o server.o seta010.o seta011.o seta018.o seta.o snaporig.o snapshot.o snes9x.o soundux.o spc700.o 
-OBJS += spc7110.o spc.o spccycles.o srtc.o tile.o explode.o unreduce.o unshrink.o unzip.o cheats.o scanf.o dc_utils.o 
+OBJS += spc7110.o spc.o spccycles.o srtc.o tile.o explode.o unreduce.o unshrink.o unzip.o cheats.o dc_utils.o 
 OBJS += dc_vmu.o XML.o dc_menu.o pvr_texture.o dc_controller.o dc_mouse.o dc_file_browser.o test.o sa1.o scherzo_snd_stream.o 
 OBJS += offsets.o romdisk.o


--- a/gcc_asm_sh4_tips.txt
+++ b/gcc_asm_sh4_tips.txt
-------------------------------------------
-Helpful tips and tricks - using GCC and G++
-------------------------------------------
-
-=> Use struct copies (instead of copying individual elements of a struct).
-
-   GCC and G++ generate code with weak scheduling when copying a struct
-   by individual elements.
-
-   GCC and G++ generate code with better instruction scheduling when
-   copying a struct via struct assignment.
-
-=> Use C++ "inline" methods sparingly
-
-   Many people overuse the C++ "inline" facility because it eliminates
-   a function call and therefore "always makes code faster".
-
-   This isn't necessarily true.
-
-   When a C++ method which requires many registers is inlined into
-   another function which uses many registers, this creates a code block
-   which requires an extremely large number of registers. This can
-   overload G++'s register allocator, and force it to generate many register
-   spills/restores to/from the stack frame (e.g. generate slow code).
-
-   If the two functions were compiled separately, then GCC could probably
-   generate more efficient code for both functions.
-
-   Good C++ methods for inlining are usually:
-   
-     o Small
-
-       Small functions are good for inlining because the cost of the
-       function call is fairly large compared to the work performed.
-       Large functions are usually not worth inlining because the cost
-       of the function call is small compared to the actual work performed.
-
-     o Use only a few variables (registers)
-
-       So you don't overload the register allocator, as described above.
-
-=> I see an "Out of memory" dialogue box when loading my executables
-   into Codescape.
-
-   Probably you have compiled for big-endian instead of little-endian...
-
-=> Passing long command lines to GCC/AS/LD
-
-   GCC supports the Microsoft convention of using "@filename" to pass
-   long command lines. So, instead of invoking "gcc -option1 -option2 ..."
-   you can put "-option1 -option2" into a file, and invoke "gcc @filename"
-   instead.
-
-=> Useful compiler options
-
-   GCC supports three calling conventions on the SH4.
-   These are:
-
-   -m4
-
-       FPU is assumed double-precision on function entry and exit.
-       single = 32 bits, double = 64 bits.
-
-   -m4-single
-
-       FPU is assumed single-precision on function entry and exit.
-       single = 32 bits, double = 64 bits.
-
-   -m4-single-only
-
-       FPU is always in single-precision mode.
-       single = double = 32 bits. This mode is the fastest
-       so we recommend using it over -m4 or -m4-single.
-
-   A few other useful options:
-
-   -Wall
-
-      Turns on all warnings. This will detect uninitialized variables,
-     unused variables, etc. Generally a Good Thing.
-
-   -ansi
-
-      Turns on ANSI C compliance warnings. For strict ANSI compliance
-     (no GNU C extensions) use -pedantic instead (somewhat deprecated).
-
-   -mrelax (C and C++, bigger win on C++)
-
-      Inserts branch shortening hints for shortening at link time.
-      Can convert some mova Lfoo, rn; jsr @rn; Lfoo .long func
-      to bsr func which saves six bytes.
-
-      Note: At source compile time, the -mrelax option must be used.
-            At link time, if using gcc -mrelax must be used.
-            If linking with ld, then -relax must be used.
-
-      Note: As of this writing -mrelax does not update the debug info
-            properly - it causes source lines not to line up properly
-            with the disassembly. If this happens, please do not use.
-
-   -mspace (C and C++)
-
-       Requests GCC to minimize code size. Tells gcc not to unroll
-       memory moves/struct copies and minimizes CSE movement.
-       Usually extracts about a 5% penalty on code speed, but very useful
-       when you're tight on memory.
-
-       Note: Optimizing for space with -mspace disables inlining of __inline__
-       functions! (for obvious reasons)
-
-   -ffast-math (C and C++)
-
-       Prevents GCC from generating calls to software sqrt, etc
-       when -m4-single-only is used.
-
-   -mdalign (C and C++)
-
-       Preserves 64-bit alignment of the stack pointer. This allows
-       hand-written assembly to use fmov.d to access doubles on the stack.
-
-       Note: Neither libc nor libcross are fully compatible
-             with this option, so great care will be needed
-             when using this option.
-
-   -mhitachi (C and C++)
-
-        Uses the Hitachi C calling convention instead of the standard
-        calling convention. The differences are:
-
-        o GNU C will pass structures in registers if possible.
-          Hitachi C always passes structures on the stack.
-
-        o GNU C can pass some varargs parameters in registers.
-          Hitachi C always passes all varargs parameters on the stack.
-
-        o GNU C assumes MACL and MACH are clobbered across function calls.
-          Hitachi C assumes MACL and MACH are preserved.
-
-       Note: You must use the Hitachi C standard C libraries when using
-             this option, because the GNU C libraries assume the standard
-             calling convention.
-
-       Note: You must properly prototype varargs/stdargs functions when
-             using -mhitachi. Failure to prototype these functions will
-             cause the callee to receive garbage data.
-
-   -fstrict-aliasing
-
-    This option will become available in early to mid '99 and improves
-    alias analysis.
-
-   -fargument-noalias
-   -fargument-noalias-global (C and C++; bigger win for C++)
-
-       (Roughly equivalent to "/Oa" on MSVC++)
-
-       "-fargument-no alias" assumes arguments may not alias each other and
-       "-fargument-noalias-global"  assumes arguments may not alias each other
-      and globals.
-
-      This means if you have func(char *a, char *b) then gcc can assume
-      a will never point to the same data items as b, so when it writes
-      through pointer b, it doesn't need to flush registers loaded with
-      data from pointer a.
-
-       G++ by default generates awful code for class variables;
-       take this example:
-
-typedef struct {
-        float x, y, z;
-} VERTEX;
-
-class foo {
-
-        public:
-                VERTEX offset;
-                void translate(VERTEX *vertex, int vertices_num);
-};
-
-void foo::translate(VERTEX *vertex, int vertices_num)
-
-{
-        int i;
-
-        for (i=0; i<vertices_num; i++) {
-                vertex[i].x += offset.x;
-                vertex[i].y += offset.y;
-                vertex[i].z += offset.z;
-        }
-}
-
-    G++ compiles this at -O2 -m4-single-only to:
-
-_translate__3fooP6VERTEXi:
-        mov     #0,r3
-        mov.l   r14,@-r15
-        cmp/ge  r6,r3
-        bt.s    L7
-        mov     r15,r14
-        mov     r4,r0
-        mov     r4,r7
-        add     #4,r0
-        mov     r5,r2
-        add     #8,r7
-        mov     r5,r1
-        add     #8,r2
-        add     #4,r1
-L5:
-        fmov.s  @r5,fr1
-        fmov.s  @r4,fr2      <- offset.x read every iteration
-        fadd    fr2,fr1
-        fmov.s  fr1,@r5
-        fmov.s  @r1,fr1
-        fmov.s  @r0,fr2      <- offset.y read every iteration
-        fadd    fr2,fr1
-        fmov.s  fr1,@r1
-        fmov.s  @r2,fr1
-        fmov.s  @r7,fr2      <- offset.z read every iteration
-        fadd    fr2,fr1
-        add     #1,r3
-        add     #12,r5
-        add     #12,r1
-        fmov.s  fr1,@r2
-        cmp/ge  r6,r3
-        bf.s    L5
-        add     #12,r2
-L7:
-        mov     r14,r15
-        rts
-        mov.l   @r15+,r14
-
-    but when -O2 -m4-single-only -fargument-nolias is used:
-
-_translate__3fooP6VERTEXi:
-        mov     #0,r3
-        mov.l   r14,@-r15
-        cmp/ge  r6,r3
-        bt.s    L7
-        mov     r15,r14
-        mov     r4,r1
-        add     #8,r1
-        fmov.s  @r1,fr2
-        mov     r5,r2
-        fmov.s  @r4+,fr4
-        add     #8,r2
-        mov     r5,r1
-        fmov.s  @r4,fr3
-        add     #4,r1
-L5:
-        fmov.s  @r5,fr1
-        fadd    fr4,fr1      <- offset.x held in fr4
-        fmov.s  fr1,@r5
-        fmov.s  @r1,fr1
-        fadd    fr3,fr1      <- offset.y held in fr3
-        fmov.s  fr1,@r1
-        fmov.s  @r2,fr1
-        fadd    fr2,fr1      <- offset.z held in fr2
-        add     #1,r3
-        add     #12,r5
-        add     #12,r1
-        fmov.s  fr1,@r2
-        cmp/ge  r6,r3
-        bf.s    L5
-        add     #12,r2
-L7:
-        mov     r14,r15
-        rts
-        mov.l   @r15+,r14
-
-       This will make C++ code much faster, although care must be taken
-       not to pass arguments pointers which could overlap.
-
-   -fno-exceptions (C++ only)
-
-       Tells GCC not to generate C++ exception-handling code.
-       Using this option can reduce C++ code size.
-
-   -fno-rtti (C++ only)
-
-       Tells GCC not to generate C++ run-time type info.
-       Using this option can reduce C++ code size.
-
-   -fomit-frame-pointer
-
-       Allows the compiler to use r14 as a general purpose register.
-       Since the source-level debugger expects to locate local variables
-       by using r14, this may cause problems when attempting to source-level
-       debug. Do not use this compiler option when debugging code.
-
-   -fwritable-strings
-
-       GCC normally places read-only strings in the .text section.
-       Specifying this option will place strings into the .data section
-       instead of the .text section.
-
-   -nostdlib
-
-       Prevents gcc from linking libraries containing the standard C library
-      if you're not using them.
-
-   -g
-     
-       Outputs debugging information.
-
-   -gdwarf+
-
-       Outputs DWARF 1.2 format debugging information with gdb extensions
-       (works with Codescape). Only applicable when using the ELF version
-       of the toolset.
-
-   -fno-schedule-insns2
-
-       Disables instruction scheduling. This makes debugging easier because
-       the source code pointer will not jump around semi-randomly when
-       single-stepping through source. This does make code slower, though.
-
-   -ffunction-sections
-
-        Places each function in a separate section. This allows the linker
-        to remove unused functions when linked with --gc-sections (or
-        -Wl,--gc-sections when using gcc to link).
-
-        This is particularly useful for C++ projects where redundant
-        constructors and destructors for each file.
-
-        o Note: This is a new feature and may not exist in this version
-          of the toolset.
-
-=> Recommended compiler options for C files
-
-   o Add -mhitachi to these options if using Shinobi/Kamui/Ninja
-
-   For C code (using COFF, debugging):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fargument-noalias-global
-           -Wall -ansi -g -fno-schedule-insns2
-
-   For C code (using COFF, release):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fargument-noalias-global
-           -Wall -ansi
-
-   For C code (using ELF, debugging):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fargument-noalias-global
-           -Wall -ansi -gdwarf+ -fno-schedule-insns2
-
-   For C code (using ELF, release):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fargument-noalias-global
-           -Wall -ansi
-
-   (Please see caution notes on -fargument-noalias-global)
-
-=> Recommended compiler options for C++ files
-
-   o Add -mhitachi to these options if using Shinobi/Kamui/Ninja
-
-   For C++ code (using COFF, debugging):
-
-        gcc -O2 -m4-single-only -mrelax -ffast-math -fno-exceptions -fno-rtti
-       -fargument-noalias-global -Wall -ansi -g -fno-schedule-insns2
-
-   For C++ code (using COFF, release):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fno-exceptions -fno-rtti
-      -fargument-noalias-global -Wall -ansi
-
-   For C++ code (using ELF, debugging):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fno-exceptions -fno-rtti
-      -fargument-noalias-global -Wall -ansi -gdwarf+ -fno-schedule-insns2
-
-   For C++ code (using ELF, release):
-
-       gcc -O2 -m4-single-only -mrelax -ffast-math -fno-exceptions -fno-rtti
-      -fargument-noalias-global -Wall -ansi
-
-   (Please see caution notes on -fargument-noalias-global)
-
-=> Viewing gcc phase execution
-
-   If you'd like to see the commands gcc passes to cpp, as and ld, then
-   pass the "-v" option to gcc.
-
-=> Can't redirect compiler errors to a file.
-
-   The compiler outputs the errors to stderr - you can redirect stderr
-   under Windows NT with "2>" like this: "gcc .... 2> errors.txt"
-
-   Under Windows 95/98 the stderr is not redirectable.
-
-=> Long command lines
-
-   GCC supports "command files" - you can use "gcc @filename" and
-   GCC will fetch its command line options from the specified
-   filename.
-
-=> Generating mixed C/assembly listings
-
-   To generate a nice assembly listing with interspersed C source, try:
-
-   gcc -O2 -m2 -g -S foo.c
-   as -ahld foo.s > foo.lst
-
-   "foo.lst" will now contain a nicely formatted mixed C/assembly listing.
-
-   Sometimes the previous method does not work well, and generating
-   an assembly listing from the object file is better. Copy the source
-   file to the same directory as the object file, then do:
-
-   objdump --disassemble --source foo.o > foo.lst
-
-=> Linking in general - C or assembly
-
-   It is much better to use gcc to link, and use "-Xlinker option"
-   to pass options to the linker, than to use ld itself to link.
-
-   There are at least two reasons:
-
-   1) If ld is used to link, then the library paths must be specified
-      explicitly. This is a problem because when gcc is upgraded,
-      the makefile will need to be changed as well to use the new libraries.
-      If this is not done. then the makefile will use the new gcc with
-      the old libraries, which can cause problems.
-
-   2) When linking using gcc, gcc will automatically set the ld parameters
-      so ld will search the SH4 libraries before the SH1 libraries if
-      -m4(-single, -single-only) is specified. If ld is used directly,
-      then the SH4 libraries must be specified manually in the correct order
-      relative to the SH1 libraries.
-
-=> Linker cannot find symbols which are in your library files.
-
-   The linker only makes one pass through your libraries, so if you have
-   two libraries which are dependent on each other you must mention the first
-   library, then the second library, then the first library again.
-
-   It is possible to create pathological cases where the linker cannot
-   link your libraries. In such a case, you probably want to merge the
-   offending libraries into one single library.
-
-   For reference, the default link order is:
-
-   Without floating point math: -lgcc -lc -lgcc
-   With    floating point math: -lm -lgcc -lc -lgcc
-
-=> Linker cannot find symbols in your application
-
-   If you are programming in C++ and calling C functions, did you remember
-   to prototype your C functions and wrap the prototypes in extern "C"?
-   Failure to do this will cause the C function names to be C++ mangled,
-   and the linker will not be able to find your C functions.
-
-=> Anonymous unions in C files
-
-   Anonymous unions are not a part of the ANSI C (X3J11) standard; they
-   are a C++ feature implemented by some C compilers, but not by gcc.
-
-=> Standard C library floating point functions
-
-   If you use standard C library floating point functions, then you must link
-   with the floating point library (libm.a) by specifying:
-
-   gcc <filename.c> -lm
-
-   The "-lm" MUST be after the filename, not before.
-
-=> Undefined reference to 'pow'/'sin'/'cos' (transcendental functions)
-
-   Most of the math library is in libm.a, so when you link using the
-   gcc library, specify "-lm".
-
-=> GCC complains about "garbage at end of number".
-
-   You probably wrote something like "0x1800E+OFFSET". GCC interprets this
-   as a floating pointer number because of "0E+OFFSET" looks like a floating
-   point number.
-
-   The solution is to place a space between "E" and "+".
-
-=> Parallel makes
-
-   If you have a dual processor Windows NT box, try "make -j4", or if you
-   have a top-level makfile which spawns makes in subdirectories, try
-   "make MAKE="make -j4"" ... this will start four compilers compiling
-   in parallel. Very nifty!
-
-=> Linker is slow.
-
-   We have heard various reports that linking to a RAMdisk accelerates
-   the link process by a factor of 5 or 6.
-
-
-Part 3
-
-Code:
------------------------------------------------------
-Helpful tips and tricks - programming with GCC and G++
------------------------------------------------------
-
-=> Link errors occur when linking stating that constructors and
-   virtual tables are undefined.
-
-   Cause:
-       The first virtual function declaration in the class is never defined in
-       any of your source modules. Therefore, the virtual table/ctor/dtor
-       definitions are never emitted.
-
-   Solution:
-       Make sure there exists a definition for all virtual functions
-       declared in the header file - especially the first one declared
-       (and not defined in the class declaration) - even if the functions
-       are never used.
-
-=> BSS must be cleared and stack (r15) must be set.
-
-   The standard initialization code in crt0.o will set clear your BSS
-   and gcc will automagically generate a call to ___main in your main()
-   function which will call your global constructors for you.
-
-   If you choose not to use crt0.o, you must clear your own BSS.
-   If you choose not to call your main function "main()" then you
-   will need to call your global constructors manually.
-
-   Variables such as "int foo = 0;" will be placed in your BSS, and if
-   your BSS isn't cleared, then (obviously) foo may have a nonzero value.
-
-   The default stack in crt0.o is eight bytes - be sure to set up
-   your own stack if you plan to do anything at all.
-
-=> Use signed char/short/ints wherever possible.
-
-   The SH4 automagically sign-extends when loading chars and shorts;
-   so to use unsigned chars and shorts it must zero out the upper bits
-   after loading. This is inefficient.
-
-   Also, float-to-signed-int conversion is one instruction, but
-   float-to-unsigned int requires a library call.
-
-=> Access to hardware registers is faster using structs.
-
-   GCC generates better code to access hardware registers if you
-   properly typedef a group of hardware registers as a struct, then access
-   the registers relative to the base pointer. This way, when accessing
-   a group of related registers, GCC only needs to load a literal
-   for the base pointer once for the group, instead of once for each
-   individual register.
-
-=> 'register' keyword
-
-   The 'register' keyword hints to GCC that the value will not be aliased,
-   e.g. when a value is written through a pointer the 'register'ed variable
-   will not be overwritten. This helps GCC generate much better code.
-
-=> Use "volatile" qualifier and cache-through address where necessary.
-
-   GCC will load a value once and test it, if it can:
-
-   int wait_flag;
-   while (wait_flag);
-
-   may generate:
-
-        mov   @r4,r0
-   L5:  cmp/eq  #0,r0
-        bf      L5
-
-   unless wait_flag is declared properly.
-
-   The volatile qualifier MUST be used on:
-
-   a) Semaphores between mainline and interrupt code
-   b) Pointers to hardware registers
-
-   The cache-through address MUST be used on:
-
-   a) Pointers to hardware registers
-
-   Also, be sure variables which are accessed by cache-through are
-   CONSISTENTLY accessed via cache-through - if you write to non-cached space,
-   then read in cached space, you may get the cached version of the data,
-   which will not be correct.
-
-=> Be careful with cache coherency
-
-   There are many ways to destroy cache coherency on the SH4, so one must
-   be quite careful of various hardware features:
-
-   a) SH4 DMA is NOT hardware cache coherent, so you must:
-
-      1) Perform a writeback (flush) of the source data,
-
-      2) Perform an invalidate of the destination data, except for the
-         first and last cache lines which must be flushed.
-
-   b) Store queue is similarly not cache coherent, so you must:
-
-      1) Invalidate the destination data if you plan to read the data.
-
-=> Porting from Hitachi C
-
-   Two problems we see often when converting code from SHC to GCC:
-
-   A) Missing volatile qualifiers.
-
-      SHC does not optimize heavily so you can be fairly sloppy with the
-      "volatile" qualifier and code will still work. Not so with GCC -
-      it optimizes very heavily and it will remove redundant loads.
-      See item #3 for specific circumstances where volatile must be used.
-
-   B) Not preserving MACL and MACH in interrupt-called routines.
-
-      SHC implements "callee preserves MACL and MACH" calling convention,
-      whereas GCC does not. If you have interrupt routines written in SHC
-      and you port them to GCC, they will not preserve MACL and MACH and
-      your mainline code will become flaky due to MACL/MACH corruption.
-
-=> #pragma interrupt
-
-   "#pragma interrupt" specifies the next function defined is an interrupt
-   handler. This pragma instructs GCC to:
-
-   1) Save all modified registers. (r0-r14, macl & mach)
-
-   2) Terminate the function with an "rte" instead of an "rts".
-
-   If you are declaring several interrupt handlers you must insert
-   "#pragma interrupt" before every interrupt function.
-
-=> GCC code quality
-
-   GCC produces code which is (IMHO) pretty good for SH4, but not great.
-   GCC has very good target-independent optimization (common subexpression
-   elimination, invariant loop expression hoisting, etc) but mediocre
-   SH-specific optimizations. On complicated functions one can produce
-   code which runs 50% faster than GCC output. Factors which affect
-   GCC's code quality for complicated functions:
-
-   1) Since the SH4 has only a "few" registers (compared to PPC/MIPS)
-      the first scheduling pass is disabled, because it creates many
-      register spills. The main effect of this is that code scheduling
-      can be somewhat weak.
-
-=> Inline assembly in C functions
-
-   Be sure to declare your inline assembly as "__asm__ volatile" instead of
-   just "__asm__" so gcc won't try to optimize and/or remove your
-   inline assembly code.
-
-   GCC has been known to optimize bits of inline assembly which aren't
-   volatile, producing strange results.
-
-   Also, if the two colons which separate the register constraints
-   at the end of an inline assembly fragment are missing, GCC will
-   occasionally crash. Be sure to place two colons even if there are
-   no register constraints in the inline assembly.
-
-   Incidentally, GCC will allow you to put an inline assembly section
-   outside of a function, so you can write whole assembly routines
-   in a C source file without wrapping them inside a C function!
-
-   When using assembly outside of a C function, omit the "volatile"
-   keyword since it isn't necessary and will only confuse the compiler.
-
-   Note: Inline assembly in a C source file outside of a function
-   is not source-level debuggable - if you wish to write large amounts
-   of assembly code it's recommended you create a separate assembly file
-   so it may be properly source-level debugged.
-
-=> gprof/gcov
-
-   gprof and gcov are included, but do not work well on the target system -
-   there is no filesystem support to create files.
-
-=> Standard C library calls - malloc(), free(), creat(), etc.
-
-   They exist, but they require host operating system support
-   through trap #34. See the source for more details.
-
-=> .ctor and .dtor sections
-
-   .ctor is the section for global C++ constructors, and .dtors are the
-   section for global C++ destructors. They require appropriate labels
-   when linking to delimit their start and end addresses, like this:
-
-   . = ALIGN(4);
-    __ctors = *;
-   *(.ctors)
-   __ctors_end = *;
-
-   . = ALIGN(4);
-    __dtors = *;
-   *(.dtors)
-   __dtors_end = *;
-
-   (this needs to be in your linker script)
-
-=> Structure padding
-
-   The SH series microprocessors cannot access words on byte boundaries
-   or longwords on byte boundaries. Words must be word-aligned; longwords
-   must be longword-aligned.
-
-   This means if your structures contain shorts or longs, the compiler
-   will add padding on the previous element to align the short or longs
-   to an acceptable address.
-
-   The "-mpadstruct" option does NOT control this behavior. It controls
-   a different structure padding behavior; it enables structure padding
-   at the end of the structure to pad it out to a multiple of four bytes,
-   which is backwards compatible with gcc-2.6.3's broken behavior.
-
-=> _end
-
-   The "_end" label is where sbrk() (called by malloc()) starts to allocate
-   memory. Basically, it designates the beginning of the heap. When the
-   heap pointer overlaps r15, the standard C library assumes the heap
-   has overflowed into the stack.
-
-   See "Interfacing with host OS" for more details.
-
-=> Overlays
-
-   See the file "overlay.txt" for more info on how to create overlays.
-
-=> ___set_fpscr
-
-   If you are compiling with an option which supports 64-bit
-   floating-point values (-m4 or -m4-single) then the routine
-   ___set_fpscr MUST be called with a single argument during
-   initialization.
-
-   This routine intializes the ___fpscr_values table which is
-   used by the gcc code to switch the FPU from single to double
-   precision and back.
-
-   The value passed to ___set_fpscr is the default fpscr bits
-   used in both single and double modes. Normally this is zero.
-   Also, if you desire to change an FPU bit permanently, this
-   can be done by calling ___set_fpscr since it modifies the
-   values which gcc loads into the fpscr.
-
-   The first entry in the fpscr_values table specifies the fpscr
-   value for the opposite of the default mode (single for -m4,
-   double for -m4-single) and the second value specifies the
-   fpscr value for the default mode (double for -m4, single for
-   -m4-single).
-
-   NOTE: GCC assumes the SZ bit is always zero in little-endian!!
-         Do not set bit 20 when calling ___set_fpscr!!
-
-=> Spinlocks
-
-   The atomic "test and set" instruction "tas.b" should be used to implement
-   spinlocks. Here is a macro which generates tas.b:
-
-#define tas_byte(x)                     \
-({                                      \
-        char *address;                  \
-        int flag;                       \
-        address = &x;                   \
-        __asm__ volatile ("tas.b @%1; movt %0": "=r" (flag): "r" (address): "t"); \
-        flag;                           \
-})
-
-=> Memory allocation functions & notes
-
-   Documentation on the memory allocation functions used in this package
-   can be obtained at http://g.oswego.edu/dl/html/malloc.html.
-
-=> Interfacing with the host OS
-
-   The standard C library included with gcc performs a "trapa #34" to call
-   the host operating system for basic services. Here are the routines
-   which are called:
-
-   read:   trapa #34, r4: SYS_read,   r5: file,   r6: ptr,   r7: len
-   lseek:  trapa #34, r4: SYS_lseek,  r5: file,   r6: ptr,   r7: dir
-   write:  trapa #34, r4: SYS_write,  r5: file,   r6: ptr,   r7: len
-   close:  trapa #34, r4: SYS_close,  r5: file,   r6: 0,     r7: 0
-
-   sbrk:   this is internal to libc now, just set  "_end" to the end
-           of your program + data area, and it will allocate from "_end"
-           until it hits the stack pointer.
-
-   open:   trapa #34, r4: SYS_open,   r5: path,   r6: flags, r7: 0
-   creat:  trapa #34, r4: SYS_creat,  r5: path,   r6: mode,  r7: 0
-   exit:   trapa #34, r4: SYS_exit,   r5: n,      r6: 0,     r7: 0
-   kill:   trapa #34, r4: SYS_exit,   r5: 0xdead, r6: 0,     r7: 0
-   stat:   trapa #34, r4: SYS_stat,   r5: path,   r6: st
-   chmod:  trapa #34, r4: SYS_chmod,  r5: path,   r6: mode
-   chown:  trapa #34, r4: SYS_chown,  r5: path,   r6: owner, r7: group
-   utime:  trapa #34, r4: SYS_utime,  r5: path,   r6: times
-   fork:   trapa #34, r4: SYS_fork
-   wait:   trapa #34, r4: SYS_wait
-   execve: trapa #34, r4: SYS_execve, r5: path,   r6: argv,  r7: envp
-   execv:  trapa #34, r4: SYS_execv   r5: path,   r6: argv
-   pipe:   trapa #34, r4: SYS_pipe,   r5: fd
-
-----------------------------------
-Helpful tips and tricks - debugging
-----------------------------------
-
-=> Easier source-code debugging
-
-   If you disable the GCC/G++ second scheduling pass with "-fno-schedule-insns2"
-   your code will be easier to debug because your source code pointer
-   will jump around less in the debugger.
-
-   This changes the compiler's code generation, and the code will be slower,
-   so it could affect the bug you're trying to isolate, so please be careful
-   with its usage.
-
-   I also recommend NOT using -fomit-frame-pointer when debugging.
-   The debugger relies upon the frame pointer to access local variables,
-   so if the frame pointer is omitted, the debugger will not be able
-   to properly display local variables for that function!
-
-=> Post-morteming crashes
-
-   When the SH4 crashes, the best thing to do is to examine the PR register
-   to determine the return address of the offending routine. This will often
-   tell you which routine has crashed. This applies to C and C++ code as well
-   as assembly code.
-
-=> C++ name demangler
-
-   There is a C++ name demangler called "c++filt" in this package.
-   If you type in a mangled name, the utility will return a demangled
-   function name and arguments.
-
----------------------------------------------
-Helpful tips and tricks - assembly programming
----------------------------------------------
-
-=> The .align directive
-
-   .align in GAS specifies the number of least-significant zero bits,
-   NOT the actual alignment value. For example. .align 1 is 2 byte aligned,
-   .align 2 is 4 byte aligned, .align 5 is 32-byte aligned.
-
-=> Using literals in assembly
-
-   Don't forget to align your literals! (.align 2) Failure to do so will
-   cause you to spend hours tracking down bus error bugs! Also, if you
-   have word literals put them at the end of your literal section so
-   they won't misalign any following longword literals.
-
-   When using literals in assembly, be sure to place your byte literals first,
-   then an .align 1 (for 2 byte alignment), then all the word literals,
-   then a .align 2 (for 4 byte alignment) then all your longword literals.
-   We recommend this because byte and word literals have smaller pc-relative
-   offsets than longword literals.
-
-   GAS also will not report illegal instructions (jump/branch) or stupid
-   (i.e. pc-relative) instructions in delay slots.
-
-=> GCC parameter passing conventions:
-
-   Integer registers:
-
-        r0: Function integer return value (if any)
-            (r1 also used if return is type "long long")
-
-     r0-r3: Temporaries. Assumed destroyed by called function.
-
-     r4-r7: First four integer arguments.
-         Assumed destroyed by called function.
-
-    r8-r15: Assumed preserved by called functions.
-
-      Note: Be sure to sign or zero-extend your chars and shorts in r4-r7
-            before calling a C function! GCC expects arguments in registers
-            to be fully sign extended to an int.
-
-   Floating-point register:
-
-         fr0: Function floating-point return value (if any)
-
-   fr0-fr3: Temporaries. Asssumed destroyed by called function.
-
-  fr4-fr11: First eight floating-point arguments.
-            Assumed destroyed by called function.
-
-  fr12-fr14: Assumed preserved by called function.
-
-   Other registers:
-
-         pr: Processor return - must be preserved, obviously.
-
-        gbr: gcc currently does not generate code which utilizes the gbr -
-            so your assembly routines are free to use the gbr.
-
-  macl, mach, fpul: Assumed to be destroyed.
-
-   fpscr: depends on -m4, -m4-single, or -m4-single-only.
-
-=> Source-level debugging GAS assembly
-
-   The easiest way to assemble assembly files into .o files with debugging
-   information is to use the ".S" extension (MUST be uppercase) and assemble
-   them with gcc:
-
-   gcc -g -c file.S
-
-   gcc will call cpp and as with the appropriate commands to properly assemble
-   the files with source-level debug information.
-
-   Incidentally, this also enables you to use all C preprocessor commands,
-   e.g. "#define", "#if ... #else ... #endif", in your assembly source files.
-
-=> Stupid (but useful) assembly tricks:
-
-   A) Use MACL, MACH, and GBR as temporaries.
-   B) Save pr in a register (sts.l pr,r14)
-
-
-
-Part 4
-
-
-Code:
----------------------
-Writing efficient code
----------------------
-
-=> Use local variables.
-
-   Global variables are slow - to retrieve the value, the SH4 typically
-   must execute:
-
-   mov.l L2,r1
-   mov.l @r1,r1
-
-   Local variables are faster - it's stack-relative, and parameters
-   are even faster because the first four integers parameters are passed
-   in r4-r7 and first eight floating-point parameters in fr4-fr11.
-
-=> Write small functions.
-
-   We've noticed GCC generates very pessimal code when it starts to
-   spill registers, so try to avoid doing too much in one function.
-
-   A function which exceeds more than about a hundred lines should
-   be broken into smaller functions.
-
-=> Read structure fields sequentially forwards; but write them sequentially
-   in reverse (if possible).
-
-   This works well because gcc can use post-increment reads when reading,
-   and pre-decrement writes when writing.
-
-=> Use the "register" keyword (mostly for locals)
-
-   The usage of the "register" keyword allows gcc to determine that
-   memory writes cannot possibly overwrite this variable, so it's more
-   likely to be registerized.
-
-=> Avoid weird casts
-
-   The ANSI C standard allows the compiler to assume that pointers to
-   different types do not alias; e.g. int * and float * never point
-   to the same location. If you cast floats to ints or to other types
-   then you may confuse the compiler and incorrect code may be generated.
-
-=> When defining automatics in functions, define non-array items first,
-   in order of number of times accessed, then array items.
-
-   For this sample:
-
-void func(void)
-
-{
-        int foo[256];
-        volatile int bar;
-
-        bar = 0;
-}
-
-GCC will generate this code, which requires two instructions to access "bar":
-
-_func:
-        mov.w   L3,r3
-        mov.l   r14,@-r15
-        mov.w   L4,r0      <- r0 = 1024
-        mov.w   L3,r7
-        sub     r3,r15
-        mov     #0,r2
-        mov     r15,r14
-        mov.l   r2,@(r0,r14)   <- bar at r15 + 1024
-        add     r7,r14
-        mov     r14,r15
-        rts
-        mov.l   @r15+,r14
-        .align 1
-L3:
-        .short  1028
-L4:
-        .short  1024
-
-If you rearrage the function so that "bar" is defined first:
-
-void func(void)
-
-{
-        volatile int bar;
-        int foo[256];
-
-        bar = 0;
-}
-
-~
-then GCC will generate this code, which only requires a single instruction
-to access "bar":
-
-_func:
-        mov.w   L3,r3
-        mov.l   r14,@-r15
-        mov.w   L3,r7
-        sub     r3,r15
-        mov     #0,r1
-        mov     r15,r14
-        mov.l   r1,@r14      <- bar at @r14
-        add     r7,r14
-        mov     r14,r15
-        rts
-        mov.l   @r15+,r14
-        .align 1
-L3:
-        .short  1028
\ No newline at end of file
--- a/output.txt
+++ b/output.txt
-Console enabled
-Cdfs redirection enabled
-Upload <superfamicast.elf>
-File format is elf32-shl, start address is 0x8c010000
-Section .text, lma 0x8c010000, size 927168
-Section .rodata, lma 0x8c0f25c0, size 151176
-Section .data, lma 0x8c1174e0, size 97976
-Section .ctors, lma 0x8c12f398, size 24
-Section .dtors, lma 0x8c12f3b0, size 20
-transferred at 568566.455653 bytes / sec
-Executing at <0x8c010000>
-Sending execute command (0x8c010000, console=1, cdfsredir=1)...executing
-
--
-KallistiOS ##version##: Fri May 14 11:11:56 CDT 2004
-  Administrator@earthworm:c:/cygwin/usr/local/dc/kos-svn/kos
-maple: active drivers:
-    PuruPuru (Vibration) Pack: JumpPack
-    VMU Driver: Clock, LCD, MemoryCard
-    Mouse Driver: Mouse
-    Keyboard Driver: Keyboard
-    Controller Driver: Controller
-  DMA Buffer at ac2527a0
-vid_set_mode: 640x480 NTSC
-fs_romdisk: mounting image at 0x8c0fbaa8 at /rd
-dc-load console support enabled
-maple: attached devices:
-  A0: Dreamcast Controller          (01000000: Controller)
-  A1: Visual Memory                 (0e000000: Clock, LCD, MemoryCard)
-  A2: Puru Puru Pack                (00010000: JumpPack)
-START OFFSETS
-
-//SCPUState
-#define Flags @(0,r12)
-#define BranchSkip @(4,r12)
-#define NMIActive @(5,r12)
-#define IRQActive @(6,r12)
-#define WaitingForInterrupt @(7,r12)
-#define InDMA @(8,r12)
-#define WhichEvent @(9,r12)
-#define PCS @(12,r12)
-#define PCBase @(16,r12)
-#define PCAtOpcodeStart @(20,r12)
-#define WaitAddress @(24,r12)
-#define WaitCounter @(28,r12)
-#define Cycles @(32,r12)
-#define NextEvent @(36,r12)
-#define V_Counter @(40,r12)
-#define MemSpeed @(44,r12)
-#define MemSpeedx2 @(48,r12)
-#define FastROMSpeed @(52,r12)
-#define AutoSaveTimer @(64,r12)
-#define SRAMModified @(10,r12)
-#define NMITriggerPoint @(68,r12)
-#define TriedInterleavedMode2 @(72,r12)
-#define BRKTriggered @(11,r12)
-#define NMICycleCount @(56,r12)
-#define IRQCycleCount @(60,r12)
-
-//SRegisters
-#define PB @(0,r11)
-#define DB @(1,r11)
-#define PP @(2,r11)
-#define PL @(2,r11)
-#define PH @(3,r11)
-#define AA @(4,r11)
-#define AL @(4,r11)
-#define AH @(5,r11)
-#define DD @(6,r11)
-#define DL @(6,r11)
-#define DH @(7,r11)
-#define SS @(8,r11)
-#define SL @(8,r11)
-#define SH @(9,r11)
-#define XX @(10,r11)
-#define XL @(10,r11)
-#define XH @(11,r11)
-#define YY @(12,r11)
-#define YL @(12,r11)
-#define YH @(13,r11)
-#define PCR @(14,r11)
-
-//CMemory
-#define RAM _Memory + 0
-#define ROM _Memory + 4
-#define VRAM _Memory + 8
-#define SRAM _Memory + 12
-#define BWRAM _Memory + 16
-#define FillRAM _Memory + 20
-#define C4RAM _Memory + 24
-#define HiROM _Memory + 28
-#define LoROM _Memory + 29
-#define SRAMMask _Memory + 32
-#define SRAMSize _Memory + 36
-#define Map _Memory + 40
-#define WriteMap _Memory + 16424
-#define MemorySpeed _Memory + 32808
-#define BlockIsRAM _Memory + 36904
-#define BlockIsROM _Memory + 41000
-#define ROMFilename _Memory + 53368
-
-//SIAPU
-#define APUPCS @(8,r1)
-#define APURAM @(12,r1)
-#define APUExecuting @(0,r1)
-#define APUDirectPage @(16,r1)
-#define APUBit @(1,r1)
-#define APUAddress @(20,r1)
-#define APUWaitAddress1 @(24,r1)
-#define APUWaitAddress2 @(28,r1)
-#define APUWaitCounter @(32,r1)
-#define APUShadowRAM @(36,r1)
-#define APUCachedSamples @(40,r1)
-#define APU_Carry @(2,r1)
-#define APU_Zero @(3,r1)
-#define APU_Overflow @(4,r1)
-#define APUTimerErrorCounter @(44,r1)
-
-//SAPU
-#define APUCycles @(0,r1)
-#define APUShowROM @(4,r1)
-#define APUFlags @(5,r1)
-#define APUKeyedChannels @(6,r1)
-#define APUOutPorts @(7,r1)
-#define APUDSP @(11,r1)
-#define APUExtraRAM 139
-#define APUTimer 204
-#define APUTimerTarget 210
-#define APUTimerEnabled 216
-#define TimerValueWritten 219
-
-//SICPU
-#define CPUSpeed @(0,r1)
-#define CPUOpcodes @(4,r1)
-#define _Carry @(8,r1)
-#define _Zero @(9,r1)
-#define _Negative @(10,r1)
-#define _Overflow @(11,r1)
-#define ShiftedDB @(20,r1)
-#define ShiftedPB @(16,r1)
-#define CPUExecuting @(12,r1)
-#define Scanline @(28,r1)
-#define Frame @(24,r1)
-
-//SSettings
-#define APUEnabled @(0,r1)
-#define Shutdown @(1,r1)
-#define SoundSkipMethod @(2,r1)
-#define H_Max @(4,r1)
-#define HBlankStart @(8,r1)
-#define CyclesPercentage @(12,r1)
-#define DisableIRQ @(16,r1)
-#define Paused @(17,r1)
-#define PAL @(3,r1)
-#define SoundSync @(108,r1)
-#define SA1Enabled @(82,r1)
-#define SuperFXEnabled @(80,r1)
-
-//SAPURegisters
-#define ApuP @(0,r1)
-#define ApuYA @(2,r1)
-#define ApuA @(2,r1)
-#define ApuY @(3,r1)
-#define ApuX @(4,r1)
-#define ApuS @(5,r1)
-#define ApuPC @(6,r1)
-#define APUPCR @(6,r1)
-
-//SPPU
-#define BGMode @(0,r1)
-#define BG3Priority @(1,r1)
-#define Brightness @(2,r1)
-#define GHight @(20,r1)
-#define GInc @(21,r1)
-#define GAddress @(22,r1)
-#define GMask1 @(24,r1)
-#define GFullGraphicCount @(26,r1)
-#define GShift @(28,r1)
-#define CGFLIP @(78,r1)
-#define CGDATA @(80,r1)
-#define FirstSprite @(10,r1)
-#define LastSprite @(592,r1)
-#define OBJ @(594,r1)
-#define OAMPriorityRotation @(2130,r1)
-#define OAMAddr @(4,r1)
-#define OAMFlip @(3,r1)
-#define OAMTileAddress @(2132,r1)
-#define IRQVBeamPos @(16,r1)
-#define IRQHBeamPos @(2134,r1)
-#define VBeamPosLatched @(2136,r1)
-#define HBeamPosLatched @(2138,r1)
-#define HBeamFlip @(2140,r1)
-#define VBeamFlip @(2141,r1)
-#define HVBeamCounterLatched @(11,r1)
-#define MatrixA @(2142,r1)
-#define MatrixB @(2144,r1)
-#define MatrixC @(2146,r1)
-#define MatrixD @(2148,r1)
-#define CentreX @(2150,r1)
-#define CentreY @(2152,r1)
-#define Joypad1ButtonReadPos @(2154,r1)
-#define Joypad2ButtonReadPos @(2155,r1)
-#define CGADD @(2156,r1)
-#define FixedColourGreen @(2158,r1)
-#define FixedColourRed @(2157,r1)
-#define FixedColourBlue @(2159,r1)
-#define SavedOAMAddr @(6,r1)
-#define ScreenHeight @(8,r1)
-#define WRAM @(2160,r1)
-#define BG_Forced @(2164,r1)
-#define ForcedBlanking @(14,r1)
-#define OBJThroughMain @(2165,r1)
-#define OBJThroughSub @(2166,r1)
-#define OBJSizeSelect @(2167,r1)
-#define OBJNameBase @(2168,r1)
-#define OAMReadFlip @(2171,r1)
-#define OAMData @(2172,r1)
-#define VTimerEnabled @(12,r1)
-#define HTimerEnabled @(13,r1)
-#define HTimerPosition @(18,r1)
-#define Mosaic @(2716,r1)
-#define BGMosaic @(2717,r1)
-#define Mode7HFlip @(2721,r1)
-#define Mode7VFlip @(2722,r1)
-#define Mode7Repeat @(2723,r1)
-#define Window1Left @(2724,r1)
-#define Window1Right @(2725,r1)
-#define Window2Left @(2726,r1)
-#define Window2Right @(2727,r1)
-#define ClipWindowOverlapLogic @(2734,r1)
-#define ClipWindow1Enable @(2740,r1)
-#define ClipWindow2Enable @(2746,r1)
-#define ClipWindow1Inside @(2752,r1)
-#define ClipWindow2Inside @(2758,r1)
-#define RecomputeClipWindows @(2764,r1)
-#define CGFLIPRead @(2765,r1)
-#define OBJNameSelect @(2766,r1)
-#define Need16x8Mulitply @(2768,r1)
-#define Joypad3ButtonReadPos @(2769,r1)
-#define MouseSpeed @(2770,r1)
-#define RangeTimeOver @(2131,r1)
-
-//InternalPPU
-#define ColorsChanged @(0,r1)
-#define HDMA @(1,r1)
-#define HDMAStarted @(2,r1)
-#define MaxBrightness @(3,r1)
-#define LatchedBlanking @(4,r1)
-#define OBJChanged @(5,r1)
-#define RenderThisFrame @(6,r1)
-#define SkippedFrames @(20,r1)
-#define FrameSkip @(24,r1)
-#define TileCache @(28,r1)
-#define TileCached @(40,r1)
-#define FirstVRAMRead @(52,r1)
-#define Interlace @(54,r1)
-#define DoubleWidthPixels @(56,r1)
-#define RenderedScreenHeight @(60,r1)
-#define RenderedScreenWidth @(64,r1)
-#define Red @(68,r1)
-#define Green @(1092,r1)
-#define Blue @(2116,r1)
-#define XB @(3140,r1)
-#define ScreenColors @(3144,r1)
-#define PreviousLine @(3656,r1)
-#define CurrentLine @(3660,r1)
-#define Joypads @(3668,r1)
-#define SuperScope @(3688,r1)
-#define Mouse @(3692,r1)
-#define PrevMouseX @(3700,r1)
-#define PrevMouseY @(3708,r1)
-#define Clip @(3716,r1)
-
-//SSA1
-#define SA1Opcodes @(0,SA1REG)
-#define SA1_Carry @(4,SA1REG)
-#define SA1_Zero @(5,SA1REG)
-#define SA1_Negative @(6,SA1REG)
-#define SA1_Overflow @(7,SA1REG)
-#define SA1CPUExecuting @(8,SA1REG)
-#define SA1ShiftedPB @(16,SA1REG)
-#define SA1ShiftedDB @(20,SA1REG)
-#define SA1Flags @(24,SA1REG)
-#define SA1Executing @(9,SA1REG)
-#define SA1NMIActive @(10,SA1REG)
-#define SA1IRQActive @(11,SA1REG)
-#define SA1WaitingForInterrupt @(12,SA1REG)
-#define SA1PCS @(28,SA1REG)
-#define SA1PCBase @(32,SA1REG)
-#define SA1PCAtOpcodeStart @(40,SA1REG)
-#define SA1WaitAddress @(44,SA1REG)
-#define SA1WaitCounter @(48,SA1REG)
-#define SA1WaitByteAddress1 @(52,SA1REG)
-#define SA1WaitByteAddress2 @(56,SA1REG)
-#define SA1BWRAM @(36,SA1REG)
-#define SA1Map @(60,SA1REG)
-#define SA1WriteMap @(16444,SA1REG)
-#define SA1op1 @(32828,SA1REG)
-#define SA1op2 @(32830,SA1REG)
-#define SA1arithmetic_op @(32832,SA1REG)
-#define SA1sum @(32836,SA1REG)
-#define SA1overflow @(32844,SA1REG)
-#define VirtualBitmapFormat @(32845,SA1REG)
-#define SA1_in_char_dma @(32846,SA1REG)
-#define SA1variable_bit_pos @(32847,SA1REG)
-
-//SSA1Registers
-#define SA1PB @(0,r11)
-#define SA1DB @(1,r11)
-#define SA1PP @(2,r11)
-#define SA1PL @(2,r11)
-#define SA1PH SA1PL + 1
-#define SA1AA @(4,r11)
-#define SA1AL @(4,r11)
-#define SA1AH SA1AL + 1
-#define SA1DD @(6,r11)
-#define SA1DL @(6,r11)
-#define SA1DH SA1DL + 1
-#define SA1SS @(8,r11)
-#define SA1SL @(8,r11)
-#define SA1SH SA1SL + 1
-#define SA1XX @(10,r11)
-#define SA1XL @(10,r11)
-#define SA1XH SA1XL + 1
-#define SA1YY @(12,r11)
-#define SA1YL @(12,r11)
-#define SA1YH SA1YL + 1
-#define SA1PCR @(14,r11)
-
-//Other stuff
-#define S9xGetByte @(4,r14)
-#define S9xSetByte @(8,r14)
-#define S9xGetWord @(12,r14)
-#define S9xSetWord @(16,r14)
-#define S9xOpcode_IRQ @(20,r14)
-#define S9xOpcode_NMI @(24,r14)
-#define S9xSetPCBase @(28,r14)
-#define ICPU @(32,r14)
-#define IAPU @(36,r14)
-#define APU @(40,r14)
-#define SA1 @(44,r14)
-#define APURegisters @(48,r14)
-#define Settings @(52,r14)
-#define IPPU @(56,r14)
-#define PPU @(60,r14)
-#define SA1Map_offs 60
-#define SA1WriteMap_offs 16444
-#define NMITriggerPoint_offs 68
-
-END OFFSETS
-vid_set_mode: 640x480 NTSC
-pvr: disabling vertical scaling for VGA
-pvr_mem_stats():
-max system bytes =          0
-system bytes     =          0
-in use bytes     =          0
-max sbrk base: a4396700
-pvr_mem_stats():
-max system bytes =     264448
-system bytes     =     264448
-in use bytes     =     262232
-max sbrk base: a43d7000
-fnt->txr->w = 256, fnt->txr->h = 256
-fs_iso9660: disc change detected
-  (joliet level 3 extensions detected)
-pvr_wait_ready: timed out
--- a/scanf.c
+++ b/scanf.c
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <stdlib.h>
-
-/* very limited vsscanf, sscanf support */
-
-static int read_int(const char* from, int* idx)
-{
-  int res = 0, i = *idx, negative = 0;
-
-/*  fprintf(stderr, "from[i]=%c\n", from[i]);*/
-
-  if (from[i] == '-')
-  {
-    i++;
-    negative = 1;
-  }
-  
-  for (; from[i]>='0' && from[i]<='9'; i++)
-  {
-/*    fprintf(stderr, "from[i]=%c\n", from[i]);*/
-    res = (from[i] - '0') + 10*res;
-  }
-  
-  *idx = i;
-  
-  return negative ? -res : res;
-}
-
-int vsscanf(const char* str, const char* fmt, va_list params)
-{
-  int sidx = 0, fidx = 0, cnt = 0, fmtlen = strlen(fmt);
-  
-  while (fidx < fmtlen)
-  {
-/*    fprintf(stderr, "fidx=%d fmt[fidx]=%c\n", fidx, fmt[fidx]);*/
-    switch (fmt[fidx])
-    {
-      case '%':
-      fidx++;
-      switch (fmt[fidx])
-      {
-        case 'd':
-        case 'i':
-        {
-          int* ptrtoval = va_arg(params, int*);
-          fidx++;
-          *ptrtoval = read_int(str, &sidx);
-          cnt++;
-        }
-        break;
-        
-        case 'c':
-        {
-          char* ptrtochar = va_arg(params, char*);
-          fidx++;
-          *ptrtochar = str[sidx++];
-          cnt++;
-        }
-        break;
-        
-        case 's':
-        {
-          char* writeto = va_arg(params, char*);
-          int i = 0;
-          /* skip whitespace */
-          fidx++;
-          while (str[sidx]==' ' || str[sidx]=='\t' || str[sidx]=='\n') sidx++;
-          while (str[sidx]!=' ' && str[sidx]!='\t' && str[sidx]!='\n')
-          {
-            writeto[i++] = str[sidx++];
-          }
-          /* write terminating NUL */
-          writeto[i] = '\0';
-          cnt++;
-        }
-        break;
-        
-        default:
-        fprintf(stderr, "Bad format character (%c)\n", fmt[fidx]);
-        exit(1);
-      }
-      break;
-
-      case ' ':
-      case '\t':
-      case '\n':
-      fidx++;
-      while (str[sidx]==' ' || str[sidx]=='\t' || str[sidx]=='\n') sidx++;
-      break;
-
-      default:
-/*      printf("str[sidx]=%d fmt[fidx]=%d\n", str[sidx], fmt[fidx]);*/
-      if (str[sidx++] != fmt[fidx++])
-      {
-        goto badmatch;
-      }
-    }
-  }
-  
-  badmatch:
-  
-  return cnt;
-}
-
-int sscanf(const char* str, const char* fmt, ...)
-{
-  va_list params;
-  int ret;
-  
-  va_start(params, fmt);
-  ret = vsscanf(str, fmt, params);
-  va_end(params);
-  
-  return ret;
-}
-
-#ifdef TESTING
-int main(void)
-{
-  char f[255], c;
-  int i;
-  
-  sscanf("S:80:/pc/home/whatever", "%c:%i:%s", &c, &i, f);
-  printf("c=%c\ni=%d\nf=%s\n", c, i, f);
-  
-  return 0;
-}
-#endif
--- a/simple_optimization_experiments.txt
+++ b/simple_optimization_experiments.txt
-FPS		ACTION
-22		removed -fno-optimize-sibling-calls
-22		again added -fno-optimize-sibling-calls, seemed slightly better
-21		changed -O3 to -Os
-23		added -fforce-addr
-21		added -fmove-all-movables
-24		removed -fno-strict-aliasing
-22		added -funroll-loops
--- a/upload.sh
+++ b/upload.sh
-dcsend.sh superfamicast.elf data.iso