diff --git a/doc/manual/length_region.pdf b/doc/manual/length_region.pdf index ce396b523..9c7329bf9 100644 Binary files a/doc/manual/length_region.pdf and b/doc/manual/length_region.pdf differ diff --git a/doc/manual/mlkit.tex b/doc/manual/mlkit.tex index 6a2459ce3..3f4751064 100644 --- a/doc/manual/mlkit.tex +++ b/doc/manual/mlkit.tex @@ -6,7 +6,7 @@ %\usepackage{times} % times font \usepackage{type1cm} % traditional font - quite thin for screen reading \renewcommand{\ttdefault}{cmtt} % smaller tt-font -\newcommand{\docversion}{4.7.16} +\newcommand{\docversion}{4.7.22} \usepackage{alltt} \makeindex \input{genericmac} @@ -18,7 +18,7 @@ \author{Mads Tofte \and Lars Birkedal \and Martin Elsman \and \and Niels Hallenberg \and Tommy H\o jfeld Olesen \and Peter Sestoft} -\date{December 15, 2025 \\[3cm] \includegraphics[width=.7\textwidth]{mlkit-logo}} +\date{April 28, 2026 \\[3cm] \includegraphics[width=.7\textwidth]{mlkit-logo}} \raggedbottom {\theorembodyfont{\rmfamily} \newtheorem{example}{Example}} @@ -100,28 +100,28 @@ %--------------------------------------------------------- \chapter*{Preface} %--------------------------------------------------------- -The MLKit is a compiler infrastructure for +MLKit is a compiler infrastructure for % \index{Standard ML}% % -the Standard ML programming language \cite{mthm97}. The MLKit supports all of +the Standard ML programming language \cite{mthm97}. MLKit supports all of Standard ML, including Modules and most parts of the SML Basis Library -\cite{basislib2004}. The MLKit features a region-based native backend that +\cite{basislib2004}. MLKit features a region-based native backend that generates efficient x64 machine code. This version of the compiler is also named -MLKit with Regions. The MLKit also features a +MLKit with Regions. MLKit also features a % \index{JavaScript backend}% % -JavaScript backend, which generates code for execution in web browsers. The -MLKit with Regions, which this report is about, is intended for the development -of stand-alone applications that must be reliable, fast, and space efficient. +JavaScript backend, which generates code for execution in web browsers. MLKit +with Regions, which this report is about, is intended for the development of +stand-alone applications that must be reliable, fast, and space efficient. There has always been a tension between high-level features in programming languages and the programmer's legitimate need to understand programs at the operational level. Very likely, if a resource conscious programmer is forced to make a choice between the two, he will choose the latter. -The MLKit with Regions is the result of a research and development effort, which +MLKit with Regions is the result of a research and development effort, which was initiated at the University of Copenhagen in 1992. The goal of the project has been to develop implementation technology that combines the advantages of using a high-level programming language, in this case Standard ML, with a model @@ -192,6 +192,15 @@ \chapter*{Preface} the MLKit extends, and differs from, the MLKit Version~4.7.2 by the following features: \begin{enumerate} +\item + % + \index{storage mode analysis!modular}% + % + Modular storage mode analysis. MLKit now implements a modular version of the + storage mode analysis, which allows for external functions to be passed with + regions that have storage mode \fw{atbot}. The modular version of the storage + mode analysis makes use of local aliasing information for inferring storage + modes (Section~\ref{smaextern.sec}). \item % \index{bit stealing}% @@ -294,7 +303,7 @@ \chapter*{Preface} \begin{enumerate} \item Support for pointer tracing garbage collection. Pointer tracing garbage collection works well together with the region memory model. While most - de-allocations can be efficiently performed by region de-allocation, there are + deallocations can be efficiently performed by region deallocation, there are some uses of memory for which life time prediction is difficult. In these cases pointer tracing garbage collection does a good job in collaboration with region memory management \cite{hallenberg99,het02}. @@ -346,7 +355,7 @@ \chapter*{Preface} September, 2001\\[5mm] Mads Tofte, Lars Birkedal, Martin Elsman, Niels Hallenberg, \\ Tommy H\o jfeld Olesen, and Peter Sestoft \\[5mm] -Revised 2002, 2004, 2005, 2021 by Martin Elsman +Revised 2002, 2004, 2005, 2021, 2025, 2026 by Martin Elsman \end{flushright} \newpage @@ -443,7 +452,7 @@ \section{Dynamic Memory Management} suspensions). The beauty of the stack discipline (apart from the fact that it is often very -efficient in practice) is that it couples allocation points and de-allocation +efficient in practice) is that it couples allocation points and deallocation points in a manner that is intelligible to the programmer. C programmers appreciate that whatever memory is allocated for local variables in a procedure ceases to exist (and take up memory) when the procedure returns. @@ -456,7 +465,7 @@ \section{Dynamic Memory Management} By contrast, programmers have much less help when it comes to managing the heap. Two approaches prevail. The first approach is that the programmer manages memory -herself, using explicit allocation and de-allocation instructions (e.g., +herself, using explicit allocation and deallocation instructions (e.g., % \index{malloc@\texttt{malloc}} % @@ -466,7 +475,7 @@ \section{Dynamic Memory Management} % \lstinline{free} in C). For non-trivial programs this can be a very significant burden, because it is, in general, very hard to make sure that none of the -values that reside in the memory that one wishes to de-allocate are not needed +values that reside in the memory that one wishes to deallocate are not needed for the rest of the computation. This puts the programmer in a difficult position. If one is too eager to reclaim memory in the heap, the program might crash under some peculiar circumstances, which might be hard to find during @@ -478,7 +487,7 @@ \section{Dynamic Memory Management} heap. Some implementors of some languages even dispense with the stack entirely, relying only on a heap with garbage collection. Garbage collection techniques separate allocation, which is done by the programmer, from -de-allocation, which is done by the garbage collector. At first, this might +deallocation, which is done by the garbage collector. At first, this might seem like the perfect solution: no longer does the programmer have to worry about whether memory that is being reclaimed really is dead, for the garbage collector only reclaims memory that cannot be reached by the rest of the @@ -492,7 +501,7 @@ \section{Dynamic Memory Management} cannot be checked by the compiler and that are likely to be invalidated as the program evolves. -\section{Checked De-Allocation of Memory} +\section{Checked Deallocation of Memory} \label{checked.sec} Regions offer an alternative to the two approaches to memory management discussed in the previous section. The runtime model is very simple, at least @@ -537,7 +546,7 @@ \section{Checked De-Allocation of Memory} gradually (and many regions can grow at the same time) so one might think of the region stack as a stack of heaps. However, the region stack really is a stack in the sense that (a) if region $r_1$ is allocated before region $r_2$ then $r_2$ -is de-allocated before $r_1$ and (b) when a region is de-allocated, all the +is deallocated before $r_1$ and (b) when a region is deallocated, all the memory occupied by that region is reclaimed in one constant time operation. Values that reside in one region are often, but not always, of the same type. A @@ -552,10 +561,10 @@ \section{Checked De-Allocation of Memory} \index{garbage collection} % garbage collection in combination with region memory management -\cite{hallenberg99}. While most de-allocations can be efficiently performed by -region de-allocation, there are some uses of memory for which it is difficult to -predict when memory can be de-allocated. In these cases reference-tracing -garbage collection does a good job in combination with region de-allocation. +\cite{hallenberg99}. While most deallocations can be efficiently performed by +region deallocation, there are some uses of memory for which it is difficult to +predict when memory can be deallocated. In these cases reference-tracing +garbage collection does a good job in combination with region deallocation. In many cases however, one can do just fine without reference-tracing garbage collection. Without reference-tracing garbage collection the region stack is the @@ -576,8 +585,8 @@ \section{Checked De-Allocation of Memory} heap with no garbage collection, which again would not be sufficient. But when one has many regions, one obtains the possibility of distinguishing -between values according to what region they reside in. The MLKit has -operations for allocating, de-allocating, and extending regions. But it also has +between values according to what region they reside in. MLKit has +operations for allocating, deallocating, and extending regions. But it also has an explicit operation for % \index{region!resetting} @@ -653,7 +662,7 @@ \section{Checked De-Allocation of Memory} In the MLKit the vast majority of region management is done automatically by the compiler and the runtime system. Indeed, with one exception, source programs are written in Standard ML, with no added syntax or special directives. The -exception has to do with resetting of regions. The MLKit provides two built-in +exception has to do with resetting of regions. MLKit provides two built-in functions % \index{resetRegions@$\resetr$}% @@ -670,7 +679,7 @@ \section{Checked De-Allocation of Memory} $\resetf$ and hence cannot crash (unless we have bugs in our system). All other region directives, including directives for allocation and -de-allocation of regions, are inferred automatically by the compiler. This +deallocation of regions, are inferred automatically by the compiler. This happens through a series of fairly complex program analyses and transformations (in the excess of twenty-five passes involving three typed intermediate languages). These analyses are formally defined and the central one, called @@ -706,7 +715,7 @@ \section{Checked De-Allocation of Memory} To sum up, the key advantages obtained by using regions compared to more traditional memory management schemes are \begin{enumerate} -\item safety of de-allocation is checked by the compiler +\item safety of deallocation is checked by the compiler \item the compiler can in many cases spot potential space leaks \item region management is under the control of the user, provided one understands the principles of region inference @@ -807,7 +816,7 @@ \section{Example: the Game of Life} \end{figure} Despite the extensive use of higher-order functions here, there is a great deal of stack structure in this computation. For example, the \lstinline{survivors} -list can be allocated in a local region which can be de-allocated after the list +list can be allocated in a local region which can be deallocated after the list has been appended (\lstinline{@}) to the \lstinline{newborn} list. The computation of \lstinline{survivors}, in turn, involves the creation of a closure for \lstinline{(twoorthree o liveneighbours)} and additional creation of @@ -907,7 +916,7 @@ \section{Example: the Game of Life} \caption{ Using double-copying in the game of Life: (a) generation number $n$ resides in region \lstinline{r0}; (b) generation $(n+1)$ has been built in \lstinline{r1}; (c) region \lstinline{r0} has been reset, the new generation copied - into \lstinline{r0} and \lstinline{r1} has been de-allocated.} \vskip5mm \hrule + into \lstinline{r0} and \lstinline{r1} has been deallocated.} \vskip5mm \hrule \label{doublecopy.fig} \end{figure} @@ -917,7 +926,7 @@ \section{Example: the Game of Life} % {\em double copying}, can be seen as a much expanded version of what is often called ``tail recursion optimisation''. In the case of regions, not just the -stack space, but also region space, is re-used. Indeed, double copying is +stack space, but also region space, is reused. Indeed, double copying is similar to invoking a copying garbage collector on specific regions that are known not to have live pointers into them. But by doing the copying ourselves, we have full control over when it happens, we know that the cost of copying will @@ -1075,7 +1084,7 @@ \section{Finite and Infinite Regions} \index{multiplicity} % {\em multiplicity} of a region is a statically determined upper bound on the -number of times a value is put into the region. The MLKit operates with three +number of times a value is put into the region. MLKit operates with three multiplicities: 0, 1 and $\infty$, ordered by $0<1<\infty$. Multiplicities annotate binding occurrences of region variables. An expression of the form \begin{center} @@ -1106,10 +1115,10 @@ \section{Runtime Types of Regions} It is often, but not always, the case that all values that reside in the same region have the same type (considered as representations of ML values). -\section{Allocation and De-Allocation of Regions} +\section{Allocation and Deallocation of Regions} \label{aldeal.sec} -The analysis that decides when regions should be allocated and de-allocated is +The analysis that decides when regions should be allocated and deallocated is called {\em region inference}. Region inference inserts several forms of memory management directives as directives into the program. The target language of region inference is called @@ -1118,7 +1127,7 @@ \section{Allocation and De-Allocation of Regions} % $\RegExp$. -In $\RegExp$, region allocation and de-allocation are explicit, they are always +In $\RegExp$, region allocation and deallocation are explicit, they are always paired, and they follow the syntactical structure of the source program. If $e$ is an expression in $\RegExp$, then so is% % @@ -1151,7 +1160,7 @@ \section{Two Backends} \index{backend!native}% \index{backend!bytecode}% -The MLKit provides two different backends, one that generates native code for +MLKit provides two different backends, one that generates native code for the x64 architecture (running Linux or macOS), the {\em native backend\/} and one that generates JavaScript \cite{10.1145/2093328.2093336}.\footnote{Previous versions of the MLKit also supported a bytecode backend that allowed for @@ -1194,8 +1203,8 @@ \section{Boxed and Unboxed Values} value\/}, on the other hand, is one that is represented by a word-sized pointer to the value itself, which is stored in one or more regions. -The MLKit uses unboxed representation for integers, booleans, words, the unit -value, and characters. The MLKit uses boxed representation for pairs, records +MLKit uses unboxed representation for integers, booleans, words, the unit +value, and characters. MLKit uses boxed representation for pairs, records (with at least one element), reals, exception values, function closures, and many constructed values (i.e., data types). Some data types, such as lists and booleans, are represented unboxed, however, which is possible by using the lower @@ -1255,11 +1264,11 @@ \section{Intermediate Languages} % {\em multiplicity analysis}. \end{description} -The MLKit compiles Standard ML records into $\Lam$-tuples and it compiles +MLKit compiles Standard ML records into $\Lam$-tuples and it compiles Standard ML match expressions and other constructs containing patterns into simpler $\Lam$-constructs. -The MLKit contains a +MLKit contains a % \index{Lambda optimiser} % @@ -1290,7 +1299,7 @@ \section{The Runtime System} \index{runtime system} % runtime system is written in C. It is small (less than 30Kb of code when -compiled). It contains operations for allocating and de-allocating regions, +compiled). It contains operations for allocating and deallocating regions, extending regions, obtaining more space from the operating system, recording region profiling information, and performing low-level operations for use by the Standard ML Basis Library. @@ -1299,9 +1308,9 @@ \section{The Runtime System} % \index{C!calling} % -C functions from MLKit code if you use the native backend. The MLKit takes care +C functions from MLKit code if you use the native backend. MLKit takes care of the memory allocation, by allocating regions for the result of the call -before the call and de-allocating the regions at some point after the call. The +before the call and deallocating the regions at some point after the call. The C functions can build ML data structures such as lists through abstract operations provided by the MLKit runtime system. See Chapter~\ref{ccall.sec} for further details. @@ -1309,7 +1318,7 @@ \section{The Runtime System} \section{Compiling Programs with the MLKit} \label{tryit.sec} -The MLKit is a +MLKit is a % \index{batch compilation} % @@ -1377,7 +1386,7 @@ \section{Running Compiled Programs} % \index{target program} % -{\em target program} in the form of an executable file, called \lstinline{run}. The +{\em target program} in the form of an executable file, called \lstinline{run}. MLKit places \lstinline{run} in the working directory. Running the target program is done from the UNIX shell by typing @@ -1426,7 +1435,7 @@ \chapter{Records and Tuples} \index{effect} % {\em effects}, which are crucial for understanding when regions are allocated -and de-allocated. +and deallocated. \section{Syntax} @@ -1572,7 +1581,7 @@ \section{Effects and Regions} in #1 pair end \end{smlcode} -The MLKit compiles the declaration into the $\MulExp$ program shown in +MLKit compiles the declaration into the $\MulExp$ program shown in Figure~\ref{elimpair.fig}. The compiler compiles the program as it is, without reducing the conditional to its \lstinline{then} branch. \begin{figure} @@ -1589,14 +1598,14 @@ \section{Effects and Regions} end \end{smlcode} \caption{Region inference decides that the pair is to be allocated in a local, - finite region; the region will be de-allocated as soon as the pair becomes + finite region; the region will be deallocated as soon as the pair becomes garbage.} \medskip\hrule \label{elimpair.fig} \end{figure} During evaluation, a region (denoted by \lstinline{r9}) is introduced before the pair is allocated; it remains on the region stack until the projection of the pair is -evaluated, after which the region is de-allocated. +evaluated, after which the region is deallocated. The ``\lstinline{:1}'' on the binding occurrences of \lstinline{r9} is a multiplicity indicating that there is only one store operation into the region. (The @@ -1610,9 +1619,9 @@ \section{Effects and Regions} But how does the MLKit know that it is safe to % -\index{region!de-allocation} +\index{region!deallocation} % -de-allocate \lstinline{r9} when the +deallocate \lstinline{r9} when the % \index{region@\texttt{region}} % @@ -1701,7 +1710,7 @@ \section{Runtime Representation} \index{record!unboxed}% % if it can see that the boxed representation of the tuple is not needed by the -function. The MLKit does not at present unbox records returned from +function. MLKit does not at present unbox records returned from functions. See Section~\ref{region-polymorphic-functions.sec} on page~\pageref{region-polymorphic-functions.sec} for details about unboxed function arguments. @@ -1718,7 +1727,7 @@ \section{Runtime Representation} \chapter{Basic Values} %--------------------------------------------------------- In this chapter we describe how basic values such as integers, reals, -strings, and booleans are represented in the MLKit. The MLKit complies to +strings, and booleans are represented in the MLKit. MLKit complies to the Definition of Standard ML (Revised) % \index{Standard ML!{1997 revision}} @@ -1751,7 +1760,7 @@ \section{Integers and Words} Chapter~\ref{gc.chap} describes how to compile programs with garbage collection enabled. The structure \lstinline{Int} provides many useful operations on integers of type \lstinline{int}.\footnote{To see what operations are available in the \boxml{Int} -structure, consult the file \boxml{basis/INTEGER.sml}.} The MLKit also defines +structure, consult the file \boxml{basis/INTEGER.sml}.} MLKit also defines the structures % \index{Int31 structure@\lstinline{Int31} structure} @@ -1775,7 +1784,7 @@ \section{Integers and Words} garbage collection is enabled, the structure \lstinline{Int} is identical to the structure \lstinline{Int63}. When garbage collection is disabled, the structure \lstinline{Int} is identical to the structure \lstinline{Int64}. -The following operations on integers are pre-defined at top level: +The following operations on integers are predefined at top level: % \index{=@\texttt{=}}% \index{<>@\texttt{<>}}% @@ -1889,7 +1898,7 @@ \section{Reals} \lstinline!$c$ at $\rho$!, where $\rho$ is a region variable, indicating the region into which the real will be stored. -The MLKit will do its best to eliminate regions holding reals by unboxing reals +MLKit will do its best to eliminate regions holding reals by unboxing reals and representing them in floating point registers or on the runtime stack. Thus, in internal representations, MLKit distinguishes between (boxed) reals and unboxed 64-bit floating point values (of type \lstinline{f64}). MLKit supports that @@ -2216,7 +2225,7 @@ \section{Region-Annotated List Types} \section{Example: Basic List Operations} \label{listexamples.sec} -The MLKit compiles the program\footnote{Program \boxml{kitdemo/onetwothree.sml}.} +MLKit compiles the program\footnote{Program \boxml{kitdemo/onetwothree.sml}.} \begin{smlcode} let val l = [1, 2, 3]; val (x::_) = l @@ -2575,7 +2584,7 @@ \section{Endomorphisms and Exomorphisms} \end{eqnarray*} As we saw in Section~\ref{life.sec}, there are cases where it is useful to copy a list from one region into another region, so as to make it possible to -de-allocate the old region. This copying can be used as a kind of +deallocate the old region. This copying can be used as a kind of programmer-controlled garbage collection in cases where garbage has accumulated in the original region. @@ -2649,7 +2658,7 @@ \section{Polymorphic Recursion} % Figure~\ref{msort.fig}.\footnote{MLB-file \boxml{kitdemo/msort.mlb}, file \boxml{kitdemo/msort.sml}. To compile the project, go to the \boxml{kitdemo} -directory and execute \boxml{mlkit msort.mlb} from the shell. The MLKit places +directory and execute \boxml{mlkit msort.mlb} from the shell. MLKit places an executable file \boxml{run} in the \boxml{kitdemo} directory. For an in-depth description of how to compile and run MLB-files and SML-files, see Chapter~\ref{mlb_and_modules.chap}.} @@ -2917,7 +2926,7 @@ \section{Scope Versus Lifetime} With region inference, however, the list bound to \lstinline{list} will stay allocated throughout the evaluation of the remainder of the \lstinline{let} -expression.\footnote{One can force de-allocation of the list by inserting +expression.\footnote{One can force deallocation of the list by inserting \lstinline!val _ = resetRegions(list)! after the declaration of \lstinline{n}; but, as we shall see, there are less draconian ways of achieving the same result.} @@ -3049,7 +3058,7 @@ \chapter{Static Detection of Space Leaks} %--------------------------------------------------------- ``Space leak'' is the informal term used when a program uses much more memory -than one would expect, typically because of memory not being re-cycled as early +than one would expect, typically because of memory not being recycled as early as it could (or not at all). If a region-polymorphic function with region-annotated type scheme $\sigma$ has @@ -3086,7 +3095,7 @@ \chapter{Static Detection of Space Leaks} region-annotated type scheme and appears with a $\Put$ effect. \section{Warnings About Space Leaks} -The MLKit can be instrumented to issue a warning each time it meets a function +MLKit can be instrumented to issue a warning each time it meets a function that is declared using \lstinline{fun} and has a free $\Put$ effect occurring somewhere in its type scheme. The way to tell the MLKit to issue the warnings is by passing the option @@ -3172,7 +3181,7 @@ \section{Fixing Space Leaks} containing \lstinline{x}, but the difference is that these are now represented by bound region variables in the type scheme of \lstinline{f}. This quantification has the advantages that (1) allocation of space for the list is -delayed until the list is actually used and (2), the list can be de-allocated +delayed until the list is actually used and (2), the list can be deallocated after the calls have been made (whereas in the original version, \lstinline{x} occurs free in the declaration of \lstinline{f} and will be kept alive as long as \lstinline{f} can be called.) @@ -3268,7 +3277,7 @@ \section{References in Standard ML} 6). \section{Runtime Representation of References} -The MLKit translates an SML expression of the form $\kw{ref}~\exp$ into an +MLKit translates an SML expression of the form $\kw{ref}~\exp$ into an expression of the form (assuming $\exp$ translates into $e$) $$\kw{ref}\,\at\,\rho~ e$$ which is evaluated as follows. First $e$ is evaluated. Assume that this evaluation yields a value $v$. Here $v$ may be a @@ -3497,7 +3506,7 @@ \chapter{Recursive Data Types} \section{Spreading Data Types} -The MLKit performs an analysis called +MLKit performs an analysis called % \index{spreading}% % @@ -3530,7 +3539,7 @@ \section{Spreading Data Types} datatype 'a tree = Lf | Br of 'a * 'a tree * 'a tree; \end{smlcode} -The MLKit extends the notion of arity (in it's internal languages) to account +MLKit extends the notion of arity (in it's internal languages) to account for regions and effects. For lists, for example, we need a region for holding the pairs to which \kw{::} is applied. For the type \begin{smlcode} @@ -3603,14 +3612,14 @@ \section{Spreading Data Types} The automatic memory management that we have discussed for lists extends to other recursive data types without problems. For example, binary trees are put -into regions and are subsequently de-allocated (in a constant time operation) +into regions and are subsequently deallocated (in a constant time operation) when the region is popped. The next section goes through an example to illustrate the point. -The MLKit attempts to use an unboxed representation for value constructors when +MLKit attempts to use an unboxed representation for value constructors when possible. We have already seen how cons (i.e., \lstinline{::}) and -\lstinline{nil} use an unboxed representation (Chapter~\ref{lists.sec}). The -MLKit uses an advanced scheme for +\lstinline{nil} use an unboxed representation (Chapter~\ref{lists.sec}). MLKit +uses an advanced scheme for % \index{unboxing}% % @@ -3680,7 +3689,7 @@ \section{Example: Balanced Trees} end \end{smlcode} The exomorphic behavior of \kw{balpre} causes the tree to be allocated in region -\kw{r131}, which is de-allocated after the call to \kw{preord}. +\kw{r131}, which is deallocated after the call to \kw{preord}. This is the kind of certainty about lifetimes we are aiming at. Imagine, for example, that the trees under consideration were terms representing different @@ -3693,10 +3702,10 @@ \section{Unboxing Schemes} \index{unboxing}% \index{value!unboxed}% -The MLKit uses a uniform representation of values, which is important for +MLKit uses a uniform representation of values, which is important for compiling generic code (e.g., functions) separately from the code that uses it. Still, under these constraints there are many possibilities for securing a -compact data representation. The MLKit uses tagged regions for some types of +compact data representation. MLKit uses tagged regions for some types of values (instead of tagging the values themselves) and, as we have seen for lists and binary trees, datatypes with a single unary constructor that takes boxed arguments are implemented using the lower-bit tags in pointers to discriminate @@ -3971,7 +3980,7 @@ \section{Raising Exceptions} \index{let region@\texttt{let region}}% % \lstinline!let region $\rho$ in $e$ end!; if $e$ evaluates to an exception -packet, then the region bound to $\rho$ is de-allocated and the packet is also +packet, then the region bound to $\rho$ is deallocated and the packet is also the result of evaluating the \lstinline{let region} expression. We have not attempted to design an analysis that would estimate how far down the @@ -4005,7 +4014,7 @@ \section{Handling Exceptions} multiplicity one and therefore be a finite region which is put on the stack. Then $e_1$, the result of compiling $\mathit{exp}_1$, is evaluated. If $e_1$ terminates with a value, the \lstinline!let region! construct will take care of -de-allocating the handler. If $e_1$ terminates with an exception, however, $f$ +deallocating the handler. If $e_1$ terminates with an exception, however, $f$ is applied. Thus the combined cost of raising an exception and searching for the appropriate @@ -4078,7 +4087,7 @@ \chapter{Resetting Regions} sometimes to a finite region and sometimes to an infinite region at runtime, resetting a region can involve a test at runtime. -The MLKit contains an analysis, called the {\em storage mode analysis}, which +MLKit contains an analysis, called the {\em storage mode analysis}, which has two purposes: \begin{enumerate} \item Inserting automatic resetting of infinite regions, when possible. @@ -4303,20 +4312,28 @@ \section{Storage Mode Analysis} phases up to code generation operate on K-normal forms, programs are always simplified to eliminate the extra \boxml{let}-bindings before they are presented to the user.} - -The MLKit also computes a set of locally live variables for those allocation -points that do not occur inside functions. +% +MLKit also computes a set of locally live variables for those allocation points +that do not occur inside functions. We now give an informal explanation of the rules that assign storage modes to allocation points. Let an allocation point \begin{equation} \label{allocpoint}\fw{at}\,\rho \end{equation} -be given. +be given. Such an allocation point can be associated with the allocation of a +constant value, an immediate closure, a record value, or the allocation of the +result of a primitive operation. Another possibility is that the allocation +point appears as an argument to a region-polymorphic function. The procedure +used for determining storage modes for function arguments depends on whether the +function is declared in the same program unit as the function call. The +procedure used for calls to externally declared functions is described in +Section~\ref{smaextern.sec}. \bigskip -\noindent{\bf CASE A:} $\rho$ is a global region. Then \fw{attop} is used. -There is a deficiency we have to admit here. The MLKit only puts +\noindent{\bf CASE A:} The region $\rho$ is a global region. +In this case \fw{attop} is used. +There is a deficiency we have to admit here. MLKit only puts \lstinline{region} bindings around expressions, not around declarations. Thus, if one writes \begin{smlcode} @@ -4471,15 +4488,12 @@ \section{Storage Mode Analysis} unit and $\epsilon_2.\rea_2$ is a corresponding actual arrow effect then an edge from $\epsilon_1$ to $\epsilon_2$ is inserted into the graph. Also, edges from $\epsilon_2$ to every region and effect variable occurring in $\rea_2$ are -inserted. Finally, for every region-polymorphic function $f$ declared in the -program and for every formal region parameter $\rho$ of $f$, if $f$ is exported -from the compilation unit, then an edge from $\rho$ to the global region of the -same runtime type as $\rho$ is inserted into the graph. (This is necessary, so -as to cater for applications of $f$ in subsequent compilation units.) +inserted. Let $G$ be the graph thus constructed. For every node $n$ in the graph, we write $\langle n\rangle$ to denote the set of region variables that can be -reached from $n$, including $n$ itself if $n$ is a region variable. The rule that replaces (B3) is: +reached from $n$, including $n$ itself if $n$ is a region variable. The rule +that replaces (B3) is: % \index{region parameter!formal}% % @@ -4500,6 +4514,34 @@ \section{Storage Mode Analysis} allocation point has $\rho$ free in its region-annotated type scheme with place, and use \fw{attop} otherwise. +\section{Calls to Externally Declared Functions} +\label{smaextern.sec} + +The procedure used in Section~\ref{sma.sec} for determining storage modes for +region arguments to a region-polymorphic function assumes that the call appears +in the program unit in which the function is declared. For calls to functions +in other program units, an additional aliasing check is used for determining if +a region bound by a \fw{let region} construct is passed \fw{attop} or \fw{atbot} +or whether a function-bound region is passed \fw{attop} or \fw{sat}. If a region +parameter is aliased with any other instantiated region parameter, the storage +mode \fw{attop} is used. Otherwise, if it can be determined (using the procedure +of Section~\ref{sma.sec}) that there are no live values in the region, the +storage mode \fw{atbot} or \fw{sat} is used, depending on whether the region is +bound by a \fw{let region} construct or whether the region is bound by a +region-polymorphic function. + +This procedure is used also for determining storage modes for certain primitives +like \kw{List.concat} and \lstinline{^}, for which it will be safe to store the +result \fw{atbot} if the region for the result is not aliased with any argument +regions and if there are no live values in the result region at the call site. + + +We refer to this procedure for determining storage modes as being +% +\index{storage mode analysis!modular} +% +\emph{modular}, as it relies on only local aliasing information for determining +the passed storage mode of a region parameter. \section{Example: Computing the Length of Lists} \label{length.sec} @@ -4620,12 +4662,12 @@ \section{Example: Computing the Length of Lists} were to collect boxed data in accumulating parameters to the function and this data is not to be returned by the function, there is a danger that the recursive call would not become a tail call due to the introduction of a \fw{let region} -construct being wrapped around the recursive call.\footnote{The MLKit features +construct being wrapped around the recursive call.\footnote{MLKit features an option \inline{-preserve_tail_calls}, which ensures that no \fw{region} binding is wrapped around a tail-call. This option is enabled by default when garbage collection is enabled.} -The next function, \fw{klength}, deserves careful study, because it +The next function, \kw{klength}, deserves careful study, because it is a prototype of a particular schema that can be used again and again when programming with regions. Iteration is done by a % @@ -4663,51 +4705,41 @@ \section{Example: Computing the Length of Lists} hump of Figure~\ref{length.region.fig}. Next consider \kw{llength}. The difference from \kw{klength} is that -\kw{llength'} is now declared outside \kw{llength}. Although the use of -\fw{local} makes it clear that \kw{llength'} is not exported from the -compilation unit, \kw{llength'} must in fact reside in a global region, because -\kw{llength}, which is exported, calls \kw{llength'}. Nonetheless, the storage -mode analysis still achieves constant memory usage. As before, we have arranged -that iteration is done by a region endomorphism that is initially applied to a -freshly constructed pair. This pair can reside in a region that is local to the -body of \kw{llength} (once again, the projection \kw{\#2(llength'(l, 0))} makes -sure that the pair does not escape the body of \kw{llength}). The crucial bit -is now what storage mode \kw{llength'} uses when it stores \kw{(xs, acc+1)}. -The only locally live variable at that point is \kw{llength'} itself and, as we -noted earlier, \kw{length'} lives in a global region, which is clearly different -from the region inside \kw{llength} that contains all the pairs. Thus, storage -mode \fw{sat} will be used, as desired. +\kw{llength'} is now declared outside \kw{llength}. The storage mode analysis +still achieves constant memory usage. As before, we have arranged that iteration +is done by a region endomorphism that is initially applied to a freshly +constructed pair. This pair can reside in a region that is local to the body of +\kw{llength} (once again, the projection \kw{\#2(llength'(l, 0))} makes sure +that the pair does not escape the body of \kw{llength}). The crucial bit is now +what storage mode \kw{llength'} uses when it stores \kw{(xs, acc+1)}. The only +locally live variable at that point is \kw{llength'} itself and, as we noted +earlier, \kw{length'} lives in a global region, which is clearly different from +the region inside \kw{llength} that contains all the pairs. Thus, storage mode +\fw{sat} will be used, as desired. Finally, consider \kw{glength}, which is similar to \kw{llength}, but with the -crucial difference that \kw{global} is exported from the compilation -unit. Because \kw{global} may be called from a different compilation unit, then, -for all we know, \kw{global} may be applied to a pair that resides in the same -(global) region as \kw{global} itself. Using \fw{sat} when storing \kw{(xs, - acc+1)} would then be a big mistake: it would destroy the very function that -we are trying to call! Therefore, the storage mode analysis assigns \fw{attop} -to that storage operation.\footnote{To be precise, \boxml{attop} comes about by -using rule (B3) of Section~\ref{sma.sec}. This example illustrates why we put -edges from formal region parameters to global regions for exported functions -when constructing the region flow graph. Notice also that storage mode analysis -does not take region runtime types into account.} Consequently, we get a memory -leak, as shown in the final hump of Figure~\ref{length.region.fig}. +difference that \kw{global} is exported from the compilation unit. Thus, +\kw{global} may be called from a different compilation unit. We see, however, +that \kw{global} behaves the same as \kw{llength} and \kw{klength} with respect +to memory usage. If \kw{global} is called from another program unit then it is +determined locally whether the passed region may hold live values at the call +site, in which case, the region is passed with storage mode \fw{attop}. To sum up, here is how one writes a loop without using space proportional to the number of iterations: \index{length of list}% \begin{enumerate} -\item The iteration should be done by an auxiliary, uncurried function that is - declared as local to the function that uses it; we refer (informally) to this - auxiliary function as the +\item The iteration should be done by an uncurried % - \index{iterator}% + \index{region endomorphism}% % - {\em iterator}. -\item The iterator should be a + region endomorphic function; we refer (informally) to this + function as the % - \index{region endomorphism}% + \index{iterator}% % - region endomorphism and should be tail recursive. + {\em iterator}. +\item The iterator should be tail recursive. \item Iteration should start from a suitably fresh initial argument; the result of the iteration should be kept clearly separate from the region where the iterator function lies. @@ -4715,8 +4747,8 @@ \section{Example: Computing the Length of Lists} Mutual recursion poses no additional complications. All functions in a block of mutually recursive functions are put in the same region. -Finally, the reader may be concerned that the two recommended solutions, -\kw{klength} and \kw{llength}, are much slower than the other versions. This is +Finally, the reader may be concerned that the recommended solutions, +\kw{klength}, and \kw{llength}, and \kw{glength}, are much slower than the other versions. This is partly an artifact of the profiling software.\footnote{When profiling is turned on, every resetting of a region involves resetting of values in the first region page of the region.} To get a better picture of the actual cost of the different @@ -4728,10 +4760,10 @@ \section{Example: Computing the Length of Lists} takes 0.12 seconds to build the list, the differences in times are clear: the version of the length function that does not use the stack and that takes its argument in registers (i.e., \kw{tlength}) is the fastest. The recommended -versions of the length function (i.e., \kw{klength} and \kw{llength}) run as -well as the versions that make use of the runtime stack (i.e., \kw{nlength} and -\kw{glength}), but are scalable and follow general useful approaches to writing -recursive functions. +versions of the length function (i.e., \kw{klength}, \kw{llength}, and +\kw{glength}) run as well as the versions that make use of the runtime stack +(i.e., \kw{nlength}), but are scalable and follow general useful approaches to +writing recursive functions. \begin{figure} \hrule @@ -4859,7 +4891,7 @@ \section{Example: Improved Mergesort} end \end{smlcode} \caption{Variant of \kw{msort} that uses \kw{resetRegions} to improve memory - usage. The MLKit fails to infer that the region holding the argument list + usage. MLKit fails to infer that the region holding the argument list \kw{xs} can be reset after \kw{xs} is split.} \label{msortreset1.fig} \medskip \hrule @@ -5004,12 +5036,6 @@ \section{Example: Scanning Text Files} \kw{readWord} will always try to store the string it creates at the bottom of the region in question. -In general however, when splitting a program unit into two, one may have to -insert explicit $\resetr$ into the second unit, when operations from the first -unit are called. This extra resetting may be necessary because formal region -parameters of exported functions are connected to global regions in the region -flow graph (cf., rule B3). - \bigskip \hrule \begin{smlcode} @@ -5325,7 +5351,7 @@ \section{Region-Annotated Function Types} occurs free in the type of the lambda abstraction. But, as pointed out in Section~\ref{effects.sec}, the criterion % -\index{region!de-allocation}% +\index{region!deallocation}% % for putting a \fw{region} binding of $\rho$ around an expression $e$ is that $\rho$ occurs free neither in the type with place of $e$ nor in the type scheme @@ -5519,14 +5545,14 @@ \section{On the Lack of Region Polymorphism} all the uses of $\kw{f}$ use the same regions. Indeed, because \kw{f} occurs free in the type environment while region inference analyses the body of the lambda abstraction, none of the regions that appear in the type of \kw{f} will -be de-allocated inside the body of the lambda abstraction. Also, such a region +be deallocated inside the body of the lambda abstraction. Also, such a region must be bound outside the lambda abstraction, so any attempt to reset such a region inside the body of the abstraction will cause the storage mode analysis to complain (by Rule (B1) of Section~\ref{sma.sec}). Therefore, when a function $f$ is passed as argument to another function $g$, as in the expression \kw{$g$($f$)}, first regions are allocated for the use of $f$, -then $g$ is called, and finally, the regions are de-allocated (provided they are +then $g$ is called, and finally, the regions are deallocated (provided they are not global regions). Whether the \fw{let region} construct thus introduced encloses the call site immediately, as in \begin{center} @@ -5537,7 +5563,7 @@ \section{On the Lack of Region Polymorphism} \lstinline!let region $\rho_1,\ldots,\rho_n$ in $\ldots$ $g$($f$) $\ldots$ end! \end{center} depends on the type and effect of the expression \kw{$g$($f$)} in the usual way: -regions can be de-allocated when they occur free neither in the type with place +regions can be deallocated when they occur free neither in the type with place of the expression nor in the type environment. \section{Examples: \texttt{map} and \texttt{foldl}} @@ -5592,7 +5618,7 @@ \section{Examples: \texttt{map} and \texttt{foldl}} local regions bound to \kw{r35}, one closure in each region. Also, if we had given some more complicated argument to \kw{map}, the body of that function could include local \fw{region} declarations. For each list element, regions -would then be allocated, used, and then de-allocated before proceeding to the +would then be allocated, used, and then deallocated before proceeding to the next list element. So it might appear that higher-order functions are nothing to worry about when @@ -5664,13 +5690,13 @@ \chapter{The Function Call} iteration is often achieved by function calls. Not surprisingly, careful compilation of function calls is essential for obtaining good performance. -The MLKit partitions function calls into four kinds, which are implemented in +MLKit partitions function calls into four kinds, which are implemented in different ways. At best, a function call is simply realised by a jump in the target code. The resource conscious programmer will want to know the special cases; for example, when doing an iterative computation, it is important to know whether the space usage is going to be independent of the number of iterations. -The MLKit performs a backwards flow analysis, called +MLKit performs a backwards flow analysis, called % \index{call conversion}% % @@ -5745,7 +5771,7 @@ \section{Tail Call to Known Function (\texttt{jmp})} where $\rho_1$, $\ldots$, $\rho_n$ are actual region parameters to the function, $f$ is the name of a region-polymorphic function, and $e_1 \cdots e_m$, $m \geq 1$ are value arguments to the function (we often omit the brackets $\kw{(} -\cdots \kw{)}$ when $m = 1$.) The MLKit turns such a function call into the form +\cdots \kw{)}$ when $m = 1$.) MLKit turns such a function call into the form \begin{center} \lstinline!jmp $f$ [$\rho_1$, $\ldots$, $\rho_n$] ($e_1,\ldots,e_m$)! \end{center} @@ -5794,9 +5820,9 @@ \section{Tail Call to Known Function (\texttt{jmp})} to ensure tail-calls only when the elements of the argument tuple themselves are unboxed; otherwise there is a risk that, for each invocation, fresh regions are introduced to hold the arguments to the call, and the call would need to return -to de-allocate these regions. +to deallocate these regions. -The MLKit can transform a call into a \kw{jmp} tail call even in the case that +MLKit can transform a call into a \kw{jmp} tail call even in the case that the call appears in the body of a \fw{fn} expression. Consider the following two mutually recursive functions \kw{g} and \kw{h}:\footnote{Program \boxml{kitdemo/tail2.sml} compiled with the flags \boxml{-no\_uncurry} and @@ -5849,7 +5875,7 @@ \section{Non-Tail Call to Known Function \index{funcall@\texttt{funcall}}(\textt functions. One example is the call to \kw{h} in Figure~\ref{tail2.fig}. Here the call to \kw{h} takes a region argument \kw{r13} and an ordinary argument \kw{(v134 - 1)}; the call to \kw{h} returns a closure, which needs to be applied -to \kw{v135} before the function \kw{g} can de-allocate the region \kw{r13} and +to \kw{v135} before the function \kw{g} can deallocate the region \kw{r13} and return. This case completes all possible cases of applications of region-polymorphic @@ -6102,7 +6128,7 @@ \chapter{ML Basis Files and Modules} \index{Standard ML!Modules}% % Standard ML Modules and the possibility of organising source files in ML Basis -Files. The MLKit fully supports Standard ML Modules and it has a sophisticated +Files. MLKit fully supports Standard ML Modules and it has a sophisticated system for avoiding unnecessary recompilation. In the following section, we describe the notion of ML Basis Files. We then turn to show how to program with structures, signatures, and functors. To enable the programmer to write @@ -6203,7 +6229,7 @@ \section{ML Basis Files} \kw{local}), in the order they are listed, except that each MLB-file is executed only the first time it is imported. -The MLKit has a system for managing compilation and recompilation of +MLKit has a system for managing compilation and recompilation of % \index{MLB-files}% % @@ -6249,7 +6275,7 @@ \section{ML Basis Files} \end{scriptcode} in the working directory. -The MLKit compiles each source file of an MLB-file one at a time, in the order +MLKit compiles each source file of an MLB-file one at a time, in the order mentioned. A source file is compiled under a given set of assumptions, which provides, for instance, region-annotated type schemes with places for free variables of the source file. Also, compilation of a source file gives rise to @@ -6394,7 +6420,7 @@ \section{Functors \label{functors.sec}} \index{functor}% \index{specialisation!functor}% % -Functors map structures to structures. The MLKit specialises a functor every +Functors map structures to structures. MLKit specialises a functor every time it is applied. Thus, types that are abstract for the programmer (inside a functor body) become visible to the compiler. Region-annotated type schemes and other information about identifiers in the actual functor argument are available @@ -6519,7 +6545,7 @@ \section{Functors \label{functors.sec}} \chapter{Garbage Collection} \label{gc.chap} %--------------------------------------------------------- -The MLKit supports reference tracing garbage collection in combination with the +MLKit supports reference tracing garbage collection in combination with the region memory model \cite{hallenberg99,het02}. Garbage collection is also possible with region profiling enabled. @@ -6529,7 +6555,7 @@ \chapter{Garbage Collection} possible to disable garbage collection at runtime for a program that has been compiled with garbage collection enabled. -The MLKit also features generational reference-tracing garbage collection +MLKit also features generational reference-tracing garbage collection (option \kw{-gengc}), which in some cases is superior to ordinary reference-tracing garbage collection, but which may also cause additional fragmentation \cite{elshaljfp21}. @@ -6562,7 +6588,7 @@ \section{Dangling Pointers} val r = fncall f 5 \end{smlcode} Notice here that region \kw{r9}, which contains the list \kw{[1]}, is -de-allocated before the function \kw{f} is applied to the value \kw{5}. If we +deallocated before the function \kw{f} is applied to the value \kw{5}. If we chose to run this program together with a reference tracing garbage collector, a fatal error could occur: The memory that contains the list \kw{[1]} could be reused for other purposes at the time the garbage collector tries to trace the @@ -6590,7 +6616,7 @@ \section{Dangling Pointers} When garbage collection is enabled, the MLKit makes sure that all values stored in a closure are kept live as long as the closure is live. Assume that the type with place $\mu$ of the function associated with the closure is on the form -$(\mu_1 \ar{\epsilon.\varphi} \mu_2, \rho_0)$. The MLKit enforces the +$(\mu_1 \ar{\epsilon.\varphi} \mu_2, \rho_0)$. MLKit enforces the restriction by requiring that for each region variable $\rho$ that occur free in the type of free variables of the function (those variables for which values are stored in the closure at runtime), $\rho$ occur free in $\mu$. In the @@ -6852,7 +6878,7 @@ \section{Example: Scanning Text Files Again} \begin{figure} \includerp{scan_rev1_1.pdf} \caption{Memory is accumulated in the top two bands. The global regions \kw{r1} - and \kw{r333643} hold the largest amount of memory. The graph was generated by + and \kw{r331112} hold the largest amount of memory. The graph was generated by first compiling the \kw{kitdemo/scan\_rev1.mlb} project with profiling enabled (option \kw{-prof}). We also add the compiler options \kw{-log\_to\_file}, \kw{-Ppp}, \kw{-Prfg}, and \kw{-Pcee} for enabling printing of region flow @@ -6863,52 +6889,60 @@ \section{Example: Scanning Text Files Again} \medskip\hrule \end{figure} -The graph shows that region \kw{r333643} accumulates more memory for each time +The graph shows that region \kw{r331112} accumulates more memory for each time it scans the file \kw{life.sml}. -To see what happens in region \kw{r333643}, we make an object profile of that +To see what happens in region \kw{r331112}, we make an object profile of that region, see Figure \ref{scan_rev1_2.fig}. \begin{figure} \includerp{scan_rev1_2.pdf} -\caption{There seems to be a space leak at program point \kw{pp894}. The graph - was generated by typing \kw{rp2ps -object r333643}.} +\caption{There seems to be a space leak at program point \kw{pp115}. The graph + was generated by typing \kw{rp2ps -object r331112}.} \label{scan_rev1_2.fig} \medskip\hrule \end{figure} -The object profile shows that program point \kw{pp894} continually allocates +The object profile shows that program point \kw{pp115} continually allocates memory that is first freed when the program terminates. We now search for -\kw{pp894} in the generated log files and in the log file located in the basis +\kw{pp115} in the generated log files and in the log file located in the basis library folder \kw{\$(SML\_LIB)/basis}).\footnote{Unfortunately, the MLKit compiler happily resets the program point counter for each compilation of a program unit, thus, there are multiple instances of the program point -\boxml{pp894}.} Among other places, we find that the program point \boxml{pp894} +\boxml{pp115}.} Among other places, we find that the program point \boxml{pp115} appears in the following fragment in the file \boxml{basis/Int.sml.log}: \begin{smlcode} - fun toString attop r1T pp859 [r61517s:inf] i = - case ... of - ... + fun conv attop r1T pp65 [r58078s:inf] (rad, radix, i) = + case ... of ... | _ => let ... - in String.implode [sat r61517s pp894] (...) + in String.implode [sat r58078s pp115] chars end \end{smlcode} +We also see that this function is called directly by the function +\kw{Int.toString} and that the formal region parameter \kw{r59522} is passed +directly as a parameter in the call to \kw{conv}: +\begin{smlcode} + fun toString attop r1T pp775 [r59522s:inf] i = + let val v1738 = DEC + in jmp conv[sat r59522s pp776] (v1738, 10, i) + end +\end{smlcode} So the space leak is caused by the \kw{String.implode} primitive function being -called with region \kw{r333643} instantiated for the formal region variable -\kw{r61517}. +called with region \kw{r331112} instantiated for the formal region variable +\kw{r59522}. -We now search for \kw{r333643} in file \kw{scan\_rev1.sml.log} and find the +We now search for \kw{r331112} in file \kw{scan\_rev1.sml.log} and find the following fragment of the region flow graph: \begin{small} \begin{scriptcode} - LETREGION[r333643:inf] - readWord[r333197:inf] --r333197 atbot--> [*r333643*] ; - toString[r61517:inf] --r61517 attop--> [*r333643*] ; + LETREGION[r331112:inf] + readWord[r330770:inf] --r330770 atbot--> [*r331112*] ; + toString[r59522:inf] --r59522 attop--> [*r331112*] ; \end{scriptcode} \end{small} -The fragment is read as follows. The formal region variable \kw{r61517} is -instantiated to the \lstinline{region}-bound region variable \kw{r333643} in a -call to \kw{toString}. Moreover, also the formal region variable \kw{r333197} -(of function \kw{readWord}) is instantiated to \kw{r333643}. (The asterisks +The fragment is read as follows. The formal region variable \kw{r59522} is +instantiated to the \lstinline{region}-bound region variable \kw{r331112} in a +call to \kw{toString}. Moreover, also the formal region variable \kw{r330770} +(of function \kw{readWord}) is instantiated to \kw{r331112}. (The asterisks (\kw{*}) denote that the node has been displayed before.) Region flow graphs are local to each program fragment in a program. A call to a @@ -6920,10 +6954,10 @@ \section{Example: Scanning Text Files Again} \kw{kitdemo/lib.sml.log} one finds that both \kw{toString} and \kw{readWord} eventually call \kw{implode}. However, \kw{readWord} is called only initially, thus, we conclude that the space leak is caused by function \kw{Int.toString} -being called with region \kw{r333643} instantiated for the formal region -variable \kw{r61517}. Indeed, by inspecting the calls to \kw{toString} in the +being called with region \kw{r331112} instantiated for the formal region +variable \kw{r59522}. Indeed, by inspecting the calls to \kw{toString} in the call-explicit program found in \kw{scan\_rev1.sml.log}, we see that -\kw{toString} is called with actual region \kw{r333643}. +\kw{toString} is called with actual region \kw{r331112}. The \kw{concat} function from the initial basis catenates a list of strings. But all the strings in the argument list to \kw{concat} are required to be in the @@ -7298,8 +7332,8 @@ \section{Regions Statistics} In the \kw{INFINITE REGIONS} part, we see the number of calls to infinite region operations such as \kw{allocateRegionInf} and \kw{alloc}. The program allocates -81050 infinite regions and de-allocates 81044; the six global regions are not -de-allocated before the region statistics is printed and the program terminates. +81050 infinite regions and deallocates 81044; the six global regions are not +deallocated before the region statistics is printed and the program terminates. The program allocates 1060622 objects in infinite regions. Infinite regions have been reset 85882 times. The \kw{deallocateRegionsUntil} operation is called whenever an exception is raised, thus, we see that no exceptions were raised by @@ -7315,7 +7349,7 @@ \section{Regions Statistics} has allocated the largest amount of memory in infinite regions. We also see that the maximum allocated space in region pages is 121936 bytes. -In the \kw{STACK} part, we see that the program allocates and de-allocates the +In the \kw{STACK} part, we see that the program allocates and deallocates the same number of finite regions. We also see that the space used for finite regions is 144 bytes and that the total use of stack space is 19792 bytes (excluding space used to hold profiling information). The stack size values @@ -7640,7 +7674,7 @@ \chapter{Calling C Functions} % \index{C!calling}% % -C functions from within Standard ML programs. The MLKit allows ML values to be +C functions from within Standard ML programs. MLKit allows ML values to be passed to C functions, which again may return ML values. Not all ML values are represented as if they were C values. For instance, C strings are null-terminated arrays of characters, whereas ML strings in the MLKit are @@ -7660,7 +7694,7 @@ \chapter{Calling C Functions} In both cases, the MLKit uses region inference to infer the lifetime of regions that are passed to the C function. The region inference algorithm does not analyse C functions. Instead, the MLKit inspects the -ML type provided by the programmer. The MLKit assumes that functions +ML type provided by the programmer. MLKit assumes that functions with monomorphic types are % \index{region exomorphism}% @@ -7692,14 +7726,14 @@ \chapter{Calling C Functions} % \index{call-back function}% % -call-back functions to be executed when specific events occur. The MLKit only +call-back functions to be executed when specific events occur. MLKit only has very limited functionality for letting a C function call ML code. %======================================================== \section{Declaring Primitives and C Functions} \label{parPassing.sec} %======================================================== -The MLKit conforms in large parts to the Standard ML Basis Library. Part +MLKit conforms in large parts to the Standard ML Basis Library. Part of the functionality found in the basis library is programmed in C and linked to the MLKit runtime system. The declarations in system dependent parts of the library use a special built-in identifier @@ -7752,7 +7786,7 @@ \section{Declaring Primitives and C Functions} value. Instead, regions are passed to the C function as additional arguments and the C function must then explicitly allocate space in these regions as needed, using a C function provided by the runtime system. The order in which these -additional arguments are passed to the C function is determined by a pre-order +additional arguments are passed to the C function is determined by a preorder traversal of the result type $\tau$. For a list type, regions are given in the order: \begin{enumerate} @@ -7764,7 +7798,7 @@ \section{Declaring Primitives and C Functions} C function, given the result type. In the example, we use the following (optional) naming convention: names of arguments holding addresses of -pre-allocated space in regions +preallocated space in regions start with \kw{vAddr}, while names of arguments holding addresses of region descriptors (to be used for allocation in a region) start with \kw{rAddr}. @@ -7774,12 +7808,12 @@ \section{Declaring Primitives and C Functions} \kw{vAddrPair}, \kw{rAddrLPairs}, \kw{rAddrEPairs}, \kw{rAddrEStrings} and \kw{vAddrReal}, see Figure \ref{args_ex1.fig}. - Here \kw{vAddrPair} holds an address pointing to pre-allocated storage in + Here \kw{vAddrPair} holds an address pointing to preallocated storage in which the tuple of the list and the (pointer to the) real should reside. The argument \kw{rAddrLPairs} holds the region address for the auxiliary pairs of the list. Similarly, the arguments \kw{rAddrEPairs} and \kw{rAddrEStrings} hold region addresses for element pairs and strings, respectively. The - argument \kw{vAddrReal} holds the address for pre-allocated storage for the + argument \kw{vAddrReal} holds the address for preallocated storage for the real. \end{example} @@ -7827,14 +7861,14 @@ \section{Declaring Primitives and C Functions} \end{picture} \caption{The order of pointers to allocated space and infinite regions is - determined from a pre-order traversal of the result type $(\kw{int} \ast + determined from a preorder traversal of the result type $(\kw{int} \ast \kw{string}) ~\kw{list} \ast \kw{real}$.} \label{args_ex1.fig} \end{center} \hrule \end{figure} -Additional arguments holding pointers to pre-allocated space and +Additional arguments holding pointers to preallocated space and infinite regions are passed to the C function prior to the ML arguments. Consider again the ML declaration \begin{center} @@ -7849,7 +7883,7 @@ \section{Declaring Primitives and C Functions} \end{eqnarray} \noindent where $\mathit{addr}_1$, $\ldots$, $\mathit{addr}_m$ are pointers to -pre-allocated space and infinite regions as described above. +preallocated space and infinite regions as described above. %======================================== @@ -8327,8 +8361,7 @@ \section{Storage Modes} % \index{is_inf_and_atbot@\texttt{is\_inf\_and\_atbot}}% % -\kw{is\_inf\_and\_atbot}, which can be used to test whether resetting is safe, -assuming that the arguments to the C function are dead. +\kw{is\_inf\_and\_atbot}, which can be used to test whether resetting is safe. The C function \kw{resetRegion}, which is also provided by the runtime system in the header file \kw{src/Runtime/Region.h}, can be used to reset a @@ -8345,13 +8378,9 @@ \section{Storage Modes} ... } \end{scriptcode} -The C programmer should be careful not to reset regions that potentially contain -live values. In particular, the C programmer must be conservative and take into -account possible region aliasing between regions holding arguments and regions -holding the result. Clearly, if a region that the C function is supposed to -return a result in contains part of the value argument(s) of the function, then -the function should not first reset the region and then try to access the -argument(s). +The C programmer can assume that if a parameter region aliases another parameter +region or a region holding an argument, the parameter region is passed with +storage mode \fw{attop}. %======================================================= \section{Endomorphisms by Polymorphism} @@ -8378,7 +8407,7 @@ \section{Endomorphisms by Polymorphism} \end{scriptcode} Now, for the MLKit to make correct, that is safe, decisions about when to -de-allocate regions, the endomorphic properties of a C function must be +deallocate regions, the endomorphic properties of a C function must be expressed in the region-annotated type scheme for value identifiers to which the C function is bound. The programmer can tell the MLKit about region endomorphic behavior of a C function by using type variables. For example, here is an ML @@ -8394,7 +8423,7 @@ \section{Endomorphisms by Polymorphism} fun second(pair : 'a * 'b) : 'b = prim("select_second", pair) \end{smlcode} -The MLKit associates the following region-annotated type scheme to the value +MLKit associates the following region-annotated type scheme to the value identifier \kw{second}: $$\forall \alpha_1\alpha_2\rho\epsilon.(\alpha_1 * \alpha_2, \rho) \ar{\epsilon.\{\Get(\rho_3)\}} \alpha_2$$ Notice that the region-annotated type @@ -8466,7 +8495,7 @@ \section{Compiling and Linking} \section{Dynamic Linking} \label{link_at_runtime.sec} -The MLKit supports +MLKit supports % \index{dynamic linking}% % @@ -8660,8 +8689,8 @@ \section{Examples\label{Cexamples.sec}} \begin{example}\label{change_elem.ex} Function \index{change_elem@\texttt{change\_elem}}\kw{change\_elem} shows the use of the macro \kw{elemRecordML}. The result type is \kw{string*int}. The - function swaps the two elements in the pair. The MLKit passes an address to - pre-allocated space for the result pair, and an infinite region for the result + function swaps the two elements in the pair. MLKit passes an address to + preallocated space for the result pair, and an infinite region for the result string. At first thought it should be enough to just swap the two arguments, and not @@ -8680,7 +8709,7 @@ \section{Examples\label{Cexamples.sec}} \end{scriptcode} This function may work sometimes but it is not safe! Region inference expects the result string to be allocated in \kw{stringRho}, and may therefore - de-allocate the region containing the argument string, \kw{secondElem\_ml}, + deallocate the region containing the argument string, \kw{secondElem\_ml}, while the string in the returned pair is still live. The safe version of \verb|change_elem| is found in \kw{libmylib.c}. See Section~\ref{C_polymorphism.sec} for inspiration to how a safe non-copying @@ -8718,10 +8747,22 @@ \section{Examples\label{Cexamples.sec}} \chapter{Summary of Changes} %--------------------------------------------------------- +\section{Changes Since Version 4.7.16} +\index{changes!since version 4.7.16}% + +Here is an overview of the main changes to MLKit since version 4.7.16. + +\subsubsection{Modular Storage Mode Analysis} + +MLKit now implements a modular version of the storage mode analysis, which +allows for external functions to be passed with regions that have storage mode +\fw{atbot}. The modular version of the storage mode analysis makes use of local +aliasing information for inferring storage modes. See Section~\ref{smaextern.sec}. + \section{Changes Since Version 4.7.2} \index{changes!since version 4.7.2}% -Here is an overview of the main changes to the MLKit since version 4.7.2. +Here is an overview of the main changes to MLKit since version 4.7.2. \subsubsection{Double-Ended Bit-Stealing for Uboxing} @@ -8754,14 +8795,14 @@ \subsubsection{Improved Abbreviated Pretty Printing} \subsubsection{Explicit Programming with Regions and Effects} -The MLKit now supports explicit programming with regions and effects. The +MLKit now supports explicit programming with regions and effects. The resulting language and compiler is called ReML and it features techniques for expressing and verifying constraints on regions and effects \cite{10.1145/3632921}. ReML is not described in this report. \subsubsection{Improved C FFI Auto Conversion} -The MLKit now has improved support for interaction with C through an enriched +MLKit now has improved support for interaction with C through an enriched set of supported types, as documented in Section~\ref{auto_conversion.sec}. \section{Changes Since Version 4.6.1} @@ -8802,7 +8843,7 @@ \subsubsection*{X64 Backend} % x64 native backend, which uses the GNU assembler to create native machine code on x86 machines. The new backend also features intra-procedural register -allocation for floating-point values. The MLKit now also features \kw{Int64.int} +allocation for floating-point values. MLKit now also features \kw{Int64.int} as the default integer and \kw{Word64.word} as the default word type (\kw{Int63.int} and \kw{Word63.word} when garbage collection is enabled). @@ -8811,7 +8852,7 @@ \subsubsection*{Hosted at Github} \index{Github!repository}% \index{Repository!Github}% % -The MLKit is now hosted at Github. The repository is +MLKit is now hosted at Github. The repository is \begin{quote} \url{https://github.com/melsman/mlkit} \end{quote} @@ -8873,7 +8914,7 @@ \subsubsection*{Support for Compiling ML Basis Files} \subsubsection*{File-based Separate Compilation} \index{separate compilation}% % -The MLKit now supports file-based separate compilation, based on dependencies +MLKit now supports file-based separate compilation, based on dependencies established from ML Basis Files. The compiler serialises symbol table information to disk for each compilation unit, so that this information can be deserialised and used when compiling other compilation units. @@ -8891,7 +8932,7 @@ \subsubsection*{Untagged Pairs, Triples and References} \index{garbage collection}% \index{value representation}% % -The MLKit now support untagged representations of heap-allocated pairs, triples, +MLKit now support untagged representations of heap-allocated pairs, triples, and Standard ML references, even when garbage collection is enabled. \section{Changes Since Version 3} @@ -8903,7 +8944,7 @@ \section{Changes Since Version 3} \subsubsection*{Garbage Collection} \index{garbage collection}% % -The MLKit supports reference tracing garbage collection in combination with the +MLKit supports reference tracing garbage collection in combination with the region memory model. Garbage collection is supported only in the native backend version of the MLKit. To enable garbage collection, pass the option \kw{-gc} to the MLKit compiler. Garbage collection is also possible with region profiling @@ -8968,7 +9009,7 @@ \subsubsection*{Modules and Separate Compilation} \cite{ElsmanICFP99,ElsmanThesis}. See Chapter~\ref{mlb_and_modules.chap}. \subsubsection*{Standard ML Basis Library} -The MLKit support a large portion of the +MLKit supports a large portion of the % \index{Standard ML Basis Library}% % @@ -8977,7 +9018,7 @@ \subsubsection*{Standard ML Basis Library} MLB-file \kw{basis.mlb} located in the directory \kw{basis}. \subsubsection*{Scalability} -The MLKit now compiles fairly large programs, including Hafnium's AnnoDomini +MLKit now compiles fairly large programs, including Hafnium's AnnoDomini (58.000 lines of SML) and the MLKit itself (around 80.000 lines). \subsubsection*{New Match Compiler} @@ -8986,7 +9027,7 @@ \subsubsection*{New Match Compiler} compiler. \subsubsection*{New StatObject Module} -The MLKit contains a module, +MLKit contains a module, % \index{StatObject}% % diff --git a/doc/manual/scan_rev1_1.pdf b/doc/manual/scan_rev1_1.pdf index 20b6232cf..33161e5b6 100644 Binary files a/doc/manual/scan_rev1_1.pdf and b/doc/manual/scan_rev1_1.pdf differ diff --git a/doc/manual/scan_rev1_2.pdf b/doc/manual/scan_rev1_2.pdf index 6e9a84e83..92b2d901c 100644 Binary files a/doc/manual/scan_rev1_2.pdf and b/doc/manual/scan_rev1_2.pdf differ diff --git a/doc/mlkit.pdf b/doc/mlkit.pdf index b402aa1e8..ed17e1d60 100644 Binary files a/doc/mlkit.pdf and b/doc/mlkit.pdf differ diff --git a/kitdemo/Makefile b/kitdemo/Makefile index dfeddf5eb..fd874d914 100644 --- a/kitdemo/Makefile +++ b/kitdemo/Makefile @@ -117,7 +117,7 @@ scan_rev1_1.ps: scan_rev1.rp ../bin/rp2ps -name "Scanning life.sml 50 times" -source $< -region $@ -sampleMax 200 scan_rev1_2.ps: scan_rev1.rp - ../bin/rp2ps -name "Scanning life.sml 50 times" -source $< -object 333643 $@ -sampleMax 200 + ../bin/rp2ps -name "Scanning life.sml 50 times" -source $< -object 331112 $@ -sampleMax 200 scan_rev2.exe: scan_rev2.mlb Makefile scan_rev2.sml lib.sml SML_LIB=.. $(MLKIT) -output $@ -no_gc -prof $< diff --git a/src/Compiler/Backend/ClosConvEnv.sml b/src/Compiler/Backend/ClosConvEnv.sml index 1266e3742..6a70710a5 100644 --- a/src/Compiler/Backend/ClosConvEnv.sml +++ b/src/Compiler/Backend/ClosConvEnv.sml @@ -3,7 +3,7 @@ structure ClosConvEnv : CLOS_CONV_ENV = struct structure BI = BackendInfo - structure RegvarFinMap = EffVarEnv + structure RegvarFinMap = Effect.Map structure Labels = AddressLabels structure PP = PrettyPrint structure LvarFinMap = Lvars.Map diff --git a/src/Compiler/Backend/SubstAndSimplify.sml b/src/Compiler/Backend/SubstAndSimplify.sml index cf0279661..c8df73b3c 100644 --- a/src/Compiler/Backend/SubstAndSimplify.sml +++ b/src/Compiler/Backend/SubstAndSimplify.sml @@ -17,7 +17,7 @@ functor SubstAndSimplify(structure LineStmt: LINE_STMT struct structure PP = PrettyPrint structure Labels = AddressLabels - structure RegvarFinMap = EffVarEnv + structure RegvarFinMap = Effect.Map val _ = Flags.add_bool_entry {long="print_simplified_program", short=NONE, item=ref false, menu=["Printing of intermediate forms", "print simplified program (LineStmt)"], neg=false, diff --git a/src/Compiler/Regions/AtInf.sml b/src/Compiler/Regions/AtInf.sml index 4b514d243..619bb5706 100644 --- a/src/Compiler/Regions/AtInf.sml +++ b/src/Compiler/Regions/AtInf.sml @@ -5,8 +5,6 @@ structure AtInf : AT_INF = structure PP = PrettyPrint structure Eff = Effect structure LLV = LocallyLiveVariables - structure BT = IntStringFinMap - structure RegvarBT = EffVarEnv (* In the old storage mode analysis an environment was propagated to later * program units. Since we must assign storage mode attop to regions passed @@ -60,8 +58,7 @@ structure AtInf : AT_INF = fun chat (s : string) = if !Flags.chat then log s else () fun show_place p = PP.flatten1(Eff.layout_effect p) - fun show_arreffs epss = concat(map (fn eps => " " ^ show_place eps) epss) - fun show_places rhos = show_arreffs rhos + fun show_places rhos = String.concatWith "," (map show_place rhos) fun forceATBOT (ATTOP p) = (ATBOT p) | forceATBOT (ATBOT p) = (ATBOT p) @@ -141,27 +138,31 @@ structure AtInf : AT_INF = datatype rho_desc = LETREGION_BOUND | LETREC_BOUND - abstype regvar_env = REGVAR_ENV of rho_desc RegvarBT.map + abstype regvar_env = REGVAR_ENV of rho_desc Eff.Map.map with exception RegvarEnv - val empty_regvar_env = REGVAR_ENV(RegvarBT.empty) - fun declare_regvar_env(x, y, REGVAR_ENV m) = REGVAR_ENV(RegvarBT.add(x,y,m)) - fun retrieve_regvar_env(x, REGVAR_ENV m) = case (RegvarBT.lookup m x) - of SOME v => v + val empty_regvar_env = REGVAR_ENV(Eff.Map.empty) + fun declare_regvar_env (x, y, REGVAR_ENV m) = REGVAR_ENV(Eff.Map.add(x,y,m)) + fun retrieve_regvar_env (x, REGVAR_ENV m) = + case Eff.Map.lookup m x of + SOME v => v | NONE => raise RegvarEnv end type lvar_env_range = (sigma*place option) * place list - abstype lvar_env = - LVAR_ENV of lvar_env_range BT.map + abstype lvar_env = LVAR_ENV of lvar_env_range Lvars.Map.map with exception LvarEnv - val empty_lvar_env = LVAR_ENV(BT.empty) - fun declare_lvar_env (x,y,LVAR_ENV m) = LVAR_ENV(BT.add(Lvars.key x,y,m)) + val empty_lvar_env = LVAR_ENV(Lvars.Map.empty) + fun declare_lvar_env (x,y,LVAR_ENV m) = LVAR_ENV(Lvars.Map.add(x,y,m)) fun retrieve_lvar_env (x,LVAR_ENV m) = - case BT.lookup m x of - SOME x => x - | NONE => raise LvarEnv + case Lvars.Map.lookup m x of + SOME x => x + | NONE => raise LvarEnv + fun is_local_lvar_env (x,LVAR_ENV m) = + case Lvars.Map.lookup m x of + SOME _ => true + | NONE => false end type excon_env_range = (sigma*place) * place list @@ -310,7 +311,7 @@ structure AtInf : AT_INF = let (* val _ = Profile.profileOn();*) fun conflicting_local_lvar lvar : conflict option = - let val lvar_res as (_,lrv) = SME.retrieve_lvar_env(Lvars.key lvar, LE) + let val lvar_res as (_,lrv) = SME.retrieve_lvar_env(lvar, LE) in case rho_points_into lrv of SOME (witness: place) => SOME(LVAR_PROBLEM(rho,lvar,lvar_res,witness)) | NONE => NONE @@ -324,7 +325,7 @@ structure AtInf : AT_INF = fun conflicting_local_excon (excon: Excon.excon): conflict option = let val excon_res as (_,lrv) = SME.retrieve_excon_env(excon, EE) - in case rho_points_into(lrv) of + in case rho_points_into lrv of SOME (witness: place) => SOME(EXCON_PROBLEM(rho,excon,excon_res,witness)) | _ => NONE end handle SME.ExconEnv => @@ -346,7 +347,7 @@ structure AtInf : AT_INF = fun equal_places rho1 rho2 = Eff.eq_effect(rho1,rho2) fun letregion_bound (rho,sme,liveset): conflict option * place at= - let fun rho_points_into rhos= List.find (equal_places rho) rhos + let fun rho_points_into rhos = List.find (equal_places rho) rhos in debug1([],liveset); any_live(rho,sme,liveset,rho_points_into,ATBOT rho) end @@ -357,7 +358,7 @@ structure AtInf : AT_INF = fun letrec_bound (rho, sme, liveset): conflict option * place at= let (*val _ = Profile.profileOn();*) - val rho_related = RegFlow.reachable_in_graph_with_insertion (rho) + val rho_related = RegFlow.reachable_in_graph_with_insertion rho (*val _ = Profile.profileOff();*) fun rho_points_into lrv = List.find is_visited lrv in debug1(rho_related,liveset); @@ -419,17 +420,51 @@ structure AtInf : AT_INF = fun mu_to_scheme_and_place (tau:RType.Type, rho_opt : place option) : sigma * place option = (RType.type_to_scheme tau, rho_opt) + (* traverse a list and apply the supplied function to each element and the other elements *) + fun traverse f nil acc = nil + | traverse f (x::xs) acc = f (x,List.revAppend (acc,xs)) :: traverse f xs (x::acc) + (********************************) (* sma0 traverses the program *) (* and inserts storage modes *) (********************************) + (* For primitives and for calls to simple functions declared non-locally, we use + a modular scheme for assigning storage modes: + If a region argument is aliased with another argument or a region in the + type of a live variable, the region is passed attop. Otherwise: + (1) if the region is LETREGION-bound, it is passed atbot + (2) if the region is LETREC-bound, it is passed sat + *) + fun sma_modular_call sme actuals = + let fun f (actual as (rho,_),others) = + case which_at sme actual of + actual' as ATTOP _ => actual' + | actual' => + let val other_rhos = map (fn (r,_) => r) others + in case SME.retrieve_regvar_env(rho,#1 sme) of + SME.LETREGION_BOUND => (* leaf *) + if List.exists (equal_places rho) other_rhos + then ATTOP rho + else actual' + | SME.LETREC_BOUND => + let val all_other_rhos = map RegFlow.reachable_in_graph_with_insertion other_rhos + val rho_related = RegFlow.reachable_in_graph_with_insertion rho + val () = List.app visit rho_related + val b = List.exists (List.exists is_visited) all_other_rhos + in List.app unvisit rho_related + ; (if b then ATTOP rho else actual') + end + end handle SME.RegvarEnv => ATTOP rho + in traverse f actuals nil + end + fun sma0 (pgm0 as PGM{expression=trip, - export_datbinds, - import_vars, - export_vars, - export_basis, - export_Psi}: (place * LLV.liveset, place*mul, qmularefset ref)LambdaPgm) + export_datbinds, + import_vars, + export_vars, + export_basis, + export_Psi}: (place * LLV.liveset, place*mul, qmularefset ref)LambdaPgm) : (place at, place*mul, unit)LambdaPgm = let fun sma_trip sme (TR(e, metaType, ateffects, mulef_r)) = let fun sma_sw sme (SWITCH(tr,choices,opt)) = @@ -441,8 +476,21 @@ structure AtInf : AT_INF = val e' = (case e of VAR{lvar,il,plain_arreffs,fix_bound,rhos_actuals=ref actuals,other} => - let val actuals' = map (which_at sme) actuals (* also liveset here*) - in VAR{lvar=lvar,il=il,plain_arreffs=plain_arreffs, + let val actuals' = + if SME.is_local_lvar_env (lvar,#2 sme) then + map (which_at sme) actuals (* also liveset here*) + else + case #2 il of + [_] => (* SIMPLE: single arrow effect, function is defined elsewhere. *) + sma_modular_call sme actuals + | _ => ( (if debug_which_at() + then log ("NOT SIMPLE - gives ATTOP for all regargs: " ^ Lvars.pr_lvar lvar + ^ "; len(actuals) = " ^ Int.toString (length actuals) + ^ "; len(eps) = " ^ Int.toString (length (#2 il)) + ^ "\n") + else ()) + ; map (fn (rho, _) => ATTOP rho) actuals) + in VAR{lvar=lvar,il=il,plain_arreffs=plain_arreffs, fix_bound=fix_bound,rhos_actuals=ref actuals',other=()} end | INTEGER(n, t, alloc) => INTEGER(n, t, Option.map (which_at sme) alloc) @@ -546,12 +594,14 @@ structure AtInf : AT_INF = EQUAL ({mu_of_arg1=mu_of_arg1, mu_of_arg2=mu_of_arg2}, (* no need for analysis *) sma_trip sme tr1,sma_trip sme tr2) | CCALL ({name, mu_result, rhos_for_result}, trs) => - CCALL ({name = name, mu_result = mu_result, - rhos_for_result = - map (fn ((rho, liveset), i_opt) => - (which_at sme (rho, liveset), i_opt)) - rhos_for_result}, - map (sma_trip sme) trs) + let val (actuals, iopts) = ListPair.unzip rhos_for_result + val actuals' = sma_modular_call sme actuals + val rhos_for_result' = ListPair.zipEq (actuals',iopts) + handle _ => die "ccall.zip" + in CCALL ({name = name, mu_result = mu_result, + rhos_for_result = rhos_for_result'}, + map (sma_trip sme) trs) + end | BLOCKF64 (alloc, trs) => BLOCKF64(which_at sme alloc,map (sma_trip sme) trs) | SCRATCHMEM (n,alloc) => SCRATCHMEM(n,which_at sme alloc) | EXPORT(i,tr) => EXPORT(i,sma_trip sme tr) diff --git a/src/Compiler/Regions/DropRegions.sml b/src/Compiler/Regions/DropRegions.sml index 9b205b873..fa2793cee 100644 --- a/src/Compiler/Regions/DropRegions.sml +++ b/src/Compiler/Regions/DropRegions.sml @@ -1,10 +1,9 @@ -structure DropRegions: DROP_REGIONS = +structure DropRegions : DROP_REGIONS = struct structure PP = PrettyPrint structure Eff = Effect structure RSE = RegionStatEnv - structure LvarMap = Lvars.Map open MulExp AtInf @@ -61,7 +60,7 @@ structure DropRegions: DROP_REGIONS = fun visit_put_rhos [] = () | visit_put_rhos (arreff::arreffs) = let fun visit_eval_effect effect = if Eff.is_put effect then visit(Eff.rho_of effect) else () - val _ = List.app visit_eval_effect (Eff.represents arreff) + val _ = List.app visit_eval_effect (Eff.represents_no_gets arreff) in visit_put_rhos arreffs end fun unvisit_bot_rhos [] = () @@ -125,13 +124,11 @@ structure DropRegions: DROP_REGIONS = val export_env = ref empty - - (* ----------------------------------------------------------------- * Environment for Region Variables * ----------------------------------------------------------------- *) - structure PlaceMap = Eff.PlaceOrEffectMap + structure PlaceMap = Eff.Map datatype regenv_res = DROPIT | KEEP | LETREGION_INF (*to disable global regions*) type place = RType.place diff --git a/src/Compiler/Regions/EFFECT.sig b/src/Compiler/Regions/EFFECT.sig index 3e399c78e..651a47b85 100644 --- a/src/Compiler/Regions/EFFECT.sig +++ b/src/Compiler/Regions/EFFECT.sig @@ -247,10 +247,11 @@ signature EFFECT = sig val topsort : effect list -> effect list val subgraph : effect list -> effect list - val eval_phis : effect list -> effect list (* returns all nodes in graph *) + val eval_phis : effect list -> effect list (* returns all nodes in graph (for ReML) *) val check_nodes : {allnodes:effect list, letregions:effect list} -> unit (* check ReML constraints *) - val represents : effect -> effect list + val represents_no_gets : effect -> effect list + val represents_with_gets : effect -> effect list val reset_cone : cone -> unit val reset : unit -> unit (* reset list of effect updates; done once pr module *) @@ -265,5 +266,5 @@ signature EFFECT = sig val layoutCone : cone -> StringTree (* sets and clears visited field *) val layoutEtas : effect list -> StringTree list (* sets and clears visited field *) - structure PlaceOrEffectMap : MONO_FINMAP where type dom = effect + structure Map : MONO_FINMAP where type dom = effect end diff --git a/src/Compiler/Regions/EffVarEnv.sml b/src/Compiler/Regions/EffVarEnv.sml deleted file mode 100644 index d427d34e3..000000000 --- a/src/Compiler/Regions/EffVarEnv.sml +++ /dev/null @@ -1,5 +0,0 @@ -structure EffVarEnv= - OrderFinMap(struct - type t = Effect.effect - val lt = Effect.lt_eps_or_rho - end) diff --git a/src/Compiler/Regions/Effect.sml b/src/Compiler/Regions/Effect.sml index 9871185fc..c8f9efc92 100644 --- a/src/Compiler/Regions/Effect.sml +++ b/src/Compiler/Regions/Effect.sml @@ -1219,12 +1219,12 @@ struct ; Lf unique_nodes end - structure PlaceOrEffectMap = + structure Map = OrderFinMap(struct type t = effect val lt = lt_eps_or_rho end) - structure Increments = PlaceOrEffectMap + structure Increments = Map val globalIncs : delta_phi Increments.map ref = ref Increments.empty @@ -2206,7 +2206,7 @@ struct (* Notice: We also check ReML constraints on atomic effects during this phase *) - structure MultiMerge = + structure MultiMerge : sig val multimerge : effect list list -> effect list end = struct (* A multi-way merge can be implemented by keeping a heap of list of elements to be sorted. The lists in the heap @@ -2224,7 +2224,6 @@ struct structure Heap = Heap(structure HeapInfo = HI) - fun merge (ae1, ae2) = ae1 fun eq (ae1, ae2) = eq_effect(ae1, ae2) fun makeHeap ll = @@ -2242,14 +2241,14 @@ struct else case Heap.delete_min h of (l1 as (x1::xs1), h1) => if eq(min,x1) then - if Heap.is_empty h1 then merge(min,x1)::xs1 - else merge_against(merge(min,x1), insert(xs1, h1)) + if Heap.is_empty h1 then min::xs1 + else merge_against(min, insert(xs1, h1)) else - if Heap.is_empty h1 then min :: l1 + if Heap.is_empty h1 then min::l1 else min :: merge_against(x1, insert(xs1, h1)) | _ => die "merge_against" - fun merge_all h = + fun merge_all h = if Heap.is_empty h then [] else case Heap.delete_min h of (x1::xs1, h1) => merge_against(x1, insert(xs1,h1)) @@ -2474,7 +2473,7 @@ struct [] ) | PUT => [n] - | GET => [] + | GET => [n] | MUT => [n] | _ => (say "bottom_up_eval: unexpected node(1): " ; say_eps n; say "\n"; @@ -2503,7 +2502,7 @@ struct result end) | PUT => [n] - | GET => [] + | GET => [n] | MUT => [n] | RHO _ => [] ) @@ -2559,13 +2558,22 @@ struct List.app (check_node letregions) allnodes handle ? as Report.DeepError _ => raise ? - fun represents eps = + fun represents_no_gets eps = + case G.find_info eps of + EPS{represents = SOME l, ...} => + List.filter (fn e => not(is_exn e) andalso not(is_mut e) andalso not(is_get e)) l + | _ => (say "No info for eps\n"; + say_eps eps; + die ("represents")) + + fun represents_with_gets eps = case G.find_info eps of EPS{represents = SOME l, ...} => List.filter (fn e => not(is_exn e) andalso not(is_mut e)) l | _ => (say "No info for eps\n"; say_eps eps; die ("represents")) + end (* diff --git a/src/Compiler/Regions/LocallyLiveVariables.sml b/src/Compiler/Regions/LocallyLiveVariables.sml index b7646e6c8..caf458ddc 100644 --- a/src/Compiler/Regions/LocallyLiveVariables.sml +++ b/src/Compiler/Regions/LocallyLiveVariables.sml @@ -66,7 +66,6 @@ struct fun findLvar pred (liveset as (lvarset,_)) = Lvarset.findLvar pred lvarset - fun norm lvarset = lvarset fun fromList lvars = Lvarset.lvarsetof(lvars) @@ -75,10 +74,9 @@ struct (*******************************************) type liveset = lvarset * Excon.excon list - fun norm_liveset (lvarset, excons) = (norm lvarset, excons) fun layout_liveset (liveset) = - case norm_liveset liveset of + case liveset of (lvarset, excons) => PrettyPrint.NODE{start = "{", finish = "}", indent =1, childsep = PrettyPrint.RIGHT",", children = map (PrettyPrint.LEAF o Lvars.pr_lvar) (Lvarset.members lvarset) @ @@ -223,11 +221,11 @@ struct VAR{lvar,...} => (cp_triv_exp e, (singleton lvar, [])) | INTEGER(i,t,NONE) => (cp_triv_exp e, empty_liveset) - | INTEGER(i,t,SOME a) => (INTEGER(i,t,SOME(a, norm_liveset liveset)), empty_liveset) + | INTEGER(i,t,SOME a) => (INTEGER(i,t,SOME(a, liveset)), empty_liveset) | WORD(i,t,NONE) => (cp_triv_exp e, empty_liveset) - | WORD(i,t,SOME a) => (WORD(i,t,SOME(a, norm_liveset liveset)), empty_liveset) - | STRING(s,place) => (STRING(s, (place, norm_liveset liveset)), empty_liveset) - | REAL(r,place) => (REAL(r, (place, norm_liveset liveset)), empty_liveset) + | WORD(i,t,SOME a) => (WORD(i,t,SOME(a, liveset)), empty_liveset) + | STRING(s,place) => (STRING(s, (place, liveset)), empty_liveset) + | REAL(r,place) => (REAL(r, (place, liveset)), empty_liveset) | F64 r => (cp_triv_exp e, empty_liveset) | UB_RECORD(trs) => let val children = map (fn tr => llv(tr, liveset)) trs @@ -241,7 +239,7 @@ struct delete_lvars(freeInBody, map #1 pat) in (FN{pat=pat,body = body',free = free, - alloc = (p, norm_liveset(union_llv(liveset, for_closure)))}, + alloc = (p, union_llv(liveset, for_closure))}, for_closure) end | LETREGION{B,rhos,body} => @@ -267,7 +265,7 @@ struct val localFree = diff_llv(union_llv(freeInRhs, freeInScope), boundByLhs) in - (FIX{free =free, shared_clos = (rho, norm_liveset(union_llv(localFree, liveset))), + (FIX{free =free, shared_clos = (rho, union_llv(localFree, liveset)), functions = map(fn({lvar,occ,tyvars,rhos,epss,Type,rhos_formals, bound_but_never_written_into, @@ -285,9 +283,8 @@ struct rhos_actuals, other},meta,phi,psi), tr2) => (* equation 23 and 24 in popl 96 paper *) let - val liveset = norm_liveset liveset val (tr2',live_tr2) = llv(tr2, liveset) - val liveset_fx = norm_liveset(union_llv(live_tr2,add_lvar(liveset, f))) (* see equation 24 *) + val liveset_fx = union_llv(live_tr2,add_lvar(liveset, f)) (* see equation 24 *) in (APP(ck,sr,TR(VAR{lvar=f,il=il,plain_arreffs=plain_arreffs, fix_bound=true, (* see (24) *) @@ -315,7 +312,6 @@ struct (* non-empty list of actual regions: has to be primitive lvar *) (case Lvars.primitive lvar of SOME _ => let - val liveset = norm_liveset liveset val (tr2',live_tr2) = llv(tr2, liveset) in (APP(ck,sr,TR(VAR{lvar=lvar,il=il,plain_arreffs=plain_arreffs, @@ -336,7 +332,7 @@ struct | EXCEPTION(excon,b,mu,rho,tr1) => let val (tr1',freeInScope) = llv(tr1, liveset) in - (EXCEPTION(excon,b,mu,(rho,norm_liveset(liveset)),tr1'), + (EXCEPTION(excon,b,mu,(rho,liveset),tr1'), delete_excon(freeInScope, excon)) end @@ -393,14 +389,14 @@ struct end | CON0{con,il,aux_regions,alloc=NONE} => - let val livehere = norm_liveset liveset + let val livehere = liveset in (CON0{con=con,il=il,aux_regions= map (fn rho => (rho,livehere)) aux_regions, alloc = NONE}, empty_liveset) end | CON0{con,il,aux_regions,alloc=SOME alloc} => - let val livehere = norm_liveset liveset + let val livehere = liveset in (CON0{con=con,il=il,aux_regions= map (fn rho => (rho,livehere)) aux_regions, alloc = SOME(alloc,livehere)}, @@ -409,14 +405,14 @@ struct | CON1({con,il,alloc=NONE},tr1) => (* tr1 is trivial *) let val (tr1',freeInArgs) = llv(tr1, liveset) - val livehere = norm_liveset(union_llv(liveset, freeInArgs)) + val livehere = union_llv(liveset, freeInArgs) in (CON1({con=con,il=il,alloc=NONE}, tr1'), freeInArgs) end | CON1({con,il,alloc=SOME alloc},tr1) => (* tr1 is trivial *) let val (tr1',freeInArgs) = llv(tr1, liveset) - val livehere = norm_liveset(union_llv(liveset, freeInArgs)) + val livehere = union_llv(liveset, freeInArgs) in (CON1({con=con,il=il,alloc=SOME(alloc,livehere)}, tr1'), freeInArgs) @@ -429,7 +425,7 @@ struct | EXCON(excon,NONE) => (EXCON(excon,NONE), empty_liveset) | EXCON(excon,SOME(rho,tr1)) => (* tr1 trivial *) let val (tr1', free_tr1) = llv(tr1, liveset) - in (EXCON(excon,SOME((rho, norm_liveset(union_llv(liveset,free_tr1))),tr1')), + in (EXCON(excon,SOME((rho, union_llv(liveset,free_tr1)),tr1')), add_excon(free_tr1,excon)) end | DEEXCON(excon,tr1) => (* tr1 trivial *) @@ -440,7 +436,7 @@ struct | RECORD(SOME rho, trs) => (* elements of trs trivial *) let val children = map (fn tr => llv(tr, liveset)) trs val freeInArgs = union_many(map #2 children) - in (RECORD(SOME(rho,norm_liveset(union_llv(freeInArgs, liveset))), map #1 children), + in (RECORD(SOME(rho,union_llv(freeInArgs, liveset)), map #1 children), freeInArgs) end | RECORD(NONE, nil) => (RECORD(NONE, nil), empty_liveset) @@ -456,7 +452,7 @@ struct end | REF(rho,tr1) => (* tr1 trivial *) let val (tr1', free_tr1) = llv(tr1, liveset) - in (REF((rho,norm_liveset(union_llv(free_tr1, liveset))), tr1'), + in (REF((rho,union_llv(free_tr1, liveset)), tr1'), free_tr1) end | ASSIGN(tr1,tr2) => (* tr1 and tr2 trivial *) @@ -482,7 +478,7 @@ struct let val children = map (fn tr => llv(tr, liveset)) trs val freeInChildren = union_many(map #2 children) - val liveset_here = norm_liveset(union_llv(freeInChildren, liveset)) + val liveset_here = union_llv(freeInChildren, liveset) in (CCALL ({name = name, mu_result = mu_result, rhos_for_result = @@ -496,17 +492,16 @@ struct let val children = map (fn tr => llv(tr, liveset)) trs val freeInArgs = union_many(map #2 children) in - (BLOCKF64((rho,norm_liveset(union_llv(freeInArgs, liveset))), map #1 children), + (BLOCKF64((rho,union_llv(freeInArgs, liveset)), map #1 children), freeInArgs) end - | SCRATCHMEM(n,a) => (SCRATCHMEM(n,(a, norm_liveset liveset)), empty_liveset) + | SCRATCHMEM(n,a) => (SCRATCHMEM(n,(a, liveset)), empty_liveset) | EXPORT(i,tr1) => let val (tr1', free_in_tr1) = llv(tr1,liveset) in (EXPORT(i,tr1'), free_in_tr1) end | RESET_REGIONS({force,regions_for_resetting,...}, tr1) => (* tr1 is trivial *) let - val liveset = norm_liveset liveset val (tr1', free_tr1) = llv(tr1, liveset) in (RESET_REGIONS({force=force, diff --git a/src/Compiler/Regions/Mul.sml b/src/Compiler/Regions/Mul.sml index b7c54ba68..c07569856 100644 --- a/src/Compiler/Regions/Mul.sml +++ b/src/Compiler/Regions/Mul.sml @@ -1,5 +1,5 @@ -structure Mul: MUL = +structure Mul : MUL = struct structure Eff = Effect structure Lam = LambdaExp @@ -7,7 +7,7 @@ struct structure RSE = RegionStatEnv structure PP = PrettyPrint structure QM_EffVarEnv = - QuasiEnv(structure OFinMap = EffVarEnv + QuasiEnv(structure OFinMap = Eff.Map val key = Effect.key_of_eps_or_rho val eq = Effect.eq_effect) @@ -99,7 +99,7 @@ struct val empty_mularefmap = GlobalEffVarEnv.empty val initial_mularefmap = let val _ = Eff.eval_phis [Eff.toplevel_arreff] - val mulef = map (fn ae => (ae,INF)) (Eff.represents Eff.toplevel_arreff) + val mulef = map (fn ae => (ae,INF)) (Eff.represents_no_gets Eff.toplevel_arreff) val mularef = (Eff.toplevel_arreff,mulef) in GlobalEffVarEnv.add(Eff.toplevel_arreff, ref mularef, GlobalEffVarEnv.empty) end diff --git a/src/Compiler/Regions/MulInf.sml b/src/Compiler/Regions/MulInf.sml index 9479ea41d..81eef351e 100644 --- a/src/Compiler/Regions/MulInf.sml +++ b/src/Compiler/Regions/MulInf.sml @@ -584,7 +584,7 @@ struct (* Psi records multiplicities for effect variables that are * bound locally within the program unit or are exported from * the program unit. Psi is a quasi-map (i.e., partly imperative)*) - let val Phi = map (fn eps => (eps, Eff.represents eps)) + let val Phi = map (fn eps => (eps, Eff.represents_no_gets eps)) ( (*Eff.toplevel_arreff :: ;mael 2004-03-31*) (List.filter Eff.is_arrow_effect effects)) val _ = if test then say " made Phi, now constructing the map Psi..." else () in makezero_Phi Phi diff --git a/src/Compiler/Regions/PhysSizeInf.sml b/src/Compiler/Regions/PhysSizeInf.sml index c3f6b01fe..0a6e65cac 100644 --- a/src/Compiler/Regions/PhysSizeInf.sml +++ b/src/Compiler/Regions/PhysSizeInf.sml @@ -1,7 +1,7 @@ structure PhysSizeInf: PHYS_SIZE_INF = struct - structure RegvarFinMap = EffVarEnv + structure RegvarFinMap = Effect.Map structure PP = PrettyPrint structure LvarMap = Lvars.Map diff --git a/src/Compiler/Regions/RegFlow.sml b/src/Compiler/Regions/RegFlow.sml index 9119e75be..8a4f27806 100644 --- a/src/Compiler/Regions/RegFlow.sml +++ b/src/Compiler/Regions/RegFlow.sml @@ -1,6 +1,6 @@ (* Region Flow Analysis: first pass of Storage Mode Analysis *) -structure RegFlow: REG_FLOW = +structure RegFlow : REG_FLOW = struct structure Eff = Effect structure PP = PrettyPrint @@ -17,14 +17,13 @@ struct type exp = (place, place*mul, qmularefset ref)MulExp.LambdaExp type trip = (place, place*mul, qmularefset ref)MulExp.trip - (* ---------------------------------------------------------------------- *) (* General Abbreviations *) (* ---------------------------------------------------------------------- *) fun log s = TextIO.output(!Flags.log,s ^ "\n") - fun device(s) = TextIO.output(!Flags.log, s) - fun dump(t) = PrettyPrint.outputTree(device, t, !Flags.colwidth) + fun device s = TextIO.output(!Flags.log, s) + fun dump t = PrettyPrint.outputTree(device, t, !Flags.colwidth) fun die errmsg = Crash.impossible ("RegFlow." ^ errmsg) fun unimplemented x = Crash.unimplemented ("RegFlow." ^ x) @@ -41,31 +40,28 @@ struct (* Utility functions *) (* ---------------------------------------------------------------------- *) - fun footnote(x,y) = x + fun footnote (x,y) = x infix footnote fun noSome x errmsg = - case x of - NONE => die errmsg - | SOME y => y - + case x of + NONE => die errmsg + | SOME y => y fun equal_places' p q = Eff.eq_effect(p,q) - (* ---------------------------------------------------------------------- *) (* Region-flow Graphs *) (* ---------------------------------------------------------------------- *) type nodeVal = effect - fun eq_nodeVal(p1, p2) = Eff.eq_effect(p1,p2) + fun eq_nodeVal (p1,p2) = Eff.eq_effect(p1,p2) - fun key_of_node(nodeVal) = Eff.key_of_eps_or_rho nodeVal + fun key_of_node nodeVal = Eff.key_of_eps_or_rho nodeVal fun pp_nodeVal p = PP.flatten1(Eff.layout_effect p) - exception Find fun find [] x = raise Find | find ((x',y)::rest) x = if eq_nodeVal(x,x') then y else find rest x @@ -74,22 +70,20 @@ struct datatype graph = NODE of nodeVal * visited ref * graph list ref - fun reachable(n) = - let - fun reachable(NODE(p,v,ref L), acc) = - if !v then acc - else - (v := true; - reachable_edges(L, p:: acc)) - and reachable_edges ([],acc) = acc - | reachable_edges (n::rest,acc) = reachable_edges(rest, reachable(n,acc)) - - fun revisit(NODE(p,v,ref L)) = - if !v then (v:= false; List.app revisit L) - else () + fun nodeVal (NODE (p,_,_)) = p - in - reachable (n, []) footnote revisit n + fun reachable n = + let fun reachable (NODE(p,v,ref L), acc) = + if !v then acc + else (v := true; + reachable_edges(L, p::acc)) + and reachable_edges ([],acc) = acc + | reachable_edges (n::rest,acc) = reachable_edges(rest, reachable(n,acc)) + + fun revisit (NODE(p,v,ref L)) = + if !v then (v:= false; List.app revisit L) + else () + in reachable (n, []) footnote revisit n end fun eq_graph (NODE(p1,v1,r1))(NODE(p2,v2,r2)) = @@ -99,90 +93,68 @@ struct val regmap_size = 1000 - abstype regmap = REGMAP of (nodeVal * graph)list Array.array + abstype regmap = REGMAP of graph list Array.array (* hash table from keys (nodeVal mod regmap_size) to buckets of nodes with the same hash key *) with - fun array_of(ref(REGMAP a)) = a + fun array_of (ref(REGMAP a)) = a val R = ref(REGMAP(Array.array(regmap_size, []))) fun lookup_assoc p [] = NONE - | lookup_assoc p ((p', graph)::rest) = - if eq_nodeVal(p,p') then SOME graph - else lookup_assoc p rest + | lookup_assoc p (g::rest) = + if eq_nodeVal(p,nodeVal g) then SOME g + else lookup_assoc p rest - fun lookup_R p : graph option= - (* lookup p first in the binary tree and then in the association list *) + fun lookup_R p : graph option = + (* lookup p first in the array and then in the association list *) lookup_assoc p (Array.sub(array_of R, key_of_node p mod regmap_size)) - fun new_graph(p) = NODE(p, ref false, ref[]) + fun new_graph p = NODE(p, ref false, ref[]) fun lookup_R_with_insert (p: nodeVal) = let val i = key_of_node p mod regmap_size val l = Array.sub(array_of R, i) - in - (case lookup_assoc p l of - SOME g => g - | NONE => (*insert (p, new_graph p) in association list *) - let val g = new_graph(p) - in Array.update(array_of R, i, (p,g)::l); - g - end) + in case lookup_assoc p l of + SOME g => g + | NONE => (*insert (p, new_graph p) in association list *) + let val g = new_graph p + in Array.update(array_of R, i, g::l) + ; g + end end + (* add_node_iter p: add p to graph, if it has not been added already*) fun add_node_iter p = (lookup_R_with_insert p; ()) - fun add_edge_graph_iter(p: nodeVal, (g as NODE(p',_,_)): graph) = - case lookup_R p of - SOME (NODE(_,_, r' as (ref subG))) => - r':= (if (List.exists (eq_graph g) subG) then subG - else - ((*log ("adding edge from " ^ (pp_nodeVal p) ^ " to " - ^ (pp_nodeVal p') ^ "\n");*) - g::subG)) - | NONE => - Crash.impossible ("add_edge_graph_iter: can't find node " ^ pp_nodeVal p) + fun add_edge_graph_iter (p: nodeVal, (g as NODE(p',_,_)): graph) = + case lookup_R p of + SOME (NODE(_,_, r' as ref subG)) => + r':= (if (List.exists (eq_graph g) subG) then subG + else + ((*log ("adding edge from " ^ (pp_nodeVal p) ^ " to " + ^ (pp_nodeVal p') ^ "\n");*) + g::subG)) + | NONE => + Crash.impossible ("add_edge_graph_iter: can't find node " ^ pp_nodeVal p) (* add edge from node labelled by p, which must exist, to node labelled q (which may be created) *) - fun add_edge_iter(p: nodeVal, q: nodeVal) = - add_edge_graph_iter(p, lookup_R_with_insert q) - - (* connecting a region variable to a global region variable - with the same runtime type *) + fun add_edge_iter (p: nodeVal, q: nodeVal) = + add_edge_graph_iter(p, lookup_R_with_insert q) - fun connect_to_global rho : unit= - case Eff.get_place_ty rho of - SOME Eff.STRING_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_string) - | SOME Eff.PAIR_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_pair) - | SOME Eff.ARRAY_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_array) - | SOME Eff.REF_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_ref) - | SOME Eff.TRIPLE_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_triple) - | SOME Eff.TOP_RT => add_edge_iter(rho,Eff.toplevel_region_withtype_top) - | SOME Eff.BOT_RT => (add_edge_iter(rho,Eff.toplevel_region_withtype_bot); - add_edge_iter(rho,Eff.toplevel_region_withtype_string); - add_edge_iter(rho,Eff.toplevel_region_withtype_pair); - add_edge_iter(rho,Eff.toplevel_region_withtype_array); - add_edge_iter(rho,Eff.toplevel_region_withtype_ref); - add_edge_iter(rho,Eff.toplevel_region_withtype_triple); - add_edge_iter(rho,Eff.toplevel_region_withtype_top)) - | NONE => die "connect_to_global" - - - fun init_regmap() = R:= REGMAP(Array.array(regmap_size, [])) + fun init_regmap () = R := REGMAP(Array.array(regmap_size, [])) (* find the places that are reachable from the place p *) fun reachable_in_graph_with_insertion p = foldl (fn (nodeVal, acc: place list) => - if Eff.is_rho nodeVal then nodeVal :: acc else acc) - [] - (reachable(lookup_R_with_insert p)) + if Eff.is_rho nodeVal then nodeVal :: acc else acc) + [] + (reachable(lookup_R_with_insert p)) (* Find the places in the graph reachable from any place or arrow effect in the list ps *) - fun reachable_with_insertion ps = let fun reachable (node as NODE(p,v,ref L), acc) = if !v then acc @@ -195,10 +167,10 @@ struct loop (pr, reachable(lookup_R_with_insert p, acc)) val reachableNodes = loop (ps, []) in - foldl (fn (NODE(nodeVal, v, _), acc : place list) => - (v := false; - if Eff.is_rho nodeVal then nodeVal :: acc else acc)) - [] reachableNodes + foldl (fn (NODE(nodeVal, v, _), acc : place list) => + (v := false; + if Eff.is_rho nodeVal then nodeVal :: acc else acc)) + [] reachableNodes end end (*abstype*) @@ -207,20 +179,22 @@ struct (* Creating a Region Flow Graph *) (* ---------------------------------------------------------------------- *) - fun insert(arreff): unit = (* assuming arreff = eps.phi, insert(arreff) makes + fun insert arreff : unit = (* assuming arreff = eps.phi, insert(arreff) makes an edge from eps to every region and effect variable which occurs free in phi *) let - val children = Eff.represents arreff + val children = Eff.represents_with_gets arreff handle ex => die ("insert " ^ pp_nodeVal arreff) + val children = map (fn e => if Eff.is_put e orelse Eff.is_get e orelse Eff.is_mut e then Eff.rho_of e else e) + children in (* make sure arreff is inserted *) - add_node_iter(arreff); + add_node_iter arreff; List.app (fn child => - if Eff.is_rho child orelse - Eff.is_arrow_effect child - then add_edge_iter(arreff, child) - else ()) children + if Eff.is_rho child orelse + Eff.is_arrow_effect child + then add_edge_iter(arreff, child) + else ()) children end local @@ -232,7 +206,7 @@ struct exception FRAME_NOT_FOUND - fun find(TR(e,_,_,_)) = find_exp e + fun find (TR(e,_,_,_)) = find_exp e and find_exp e = let fun find_sw(SWITCH(_,branches,otherwise)) = @@ -261,10 +235,7 @@ struct fun mk_graph0 trip = let - val exported = find trip handle FRAME_NOT_FOUND => die "frame not found" - fun is_exported lvar = List.exists (fn lvar_frame => Lvars.eq(lvar, lvar_frame)) exported - - fun mk_graph_exp(e: exp): unit = + fun mk_graph_exp (e: exp): unit = case e of FIX {free, shared_clos, functions, scope} => let @@ -273,16 +244,8 @@ struct bound_but_never_written_into, other,bind} = let - val _ = List.app insert formal_arreffs - (*val _ = log("lvar = " ^ Lvars.pr_lvar lvar ^ ":" ^ Int.toString(length formal_regvars)) *) - - (* region-polymorphic functions which are exported must have their formal - region parameters connected to global regions with the same runtime type. - This is necessary for soundness of the analysis across program units. - *) - fun deal_with_one_instance il = let val (actual_rhos, actual_epss, taus) = RType.un_il il in @@ -300,13 +263,9 @@ struct handle BasisCompat.ListPair.UnequalLengths => die "deal_with_one_instance (2)"); List.app insert actual_epss - end - in List.app add_node_iter formal_regvars; - if is_exported lvar then List.app connect_to_global formal_regvars - else (); List.app add_node_iter formal_arreffs; List.app deal_with_one_instance instances; mk_graph bind diff --git a/src/Compiler/Regions/RegionStatEnv.sml b/src/Compiler/Regions/RegionStatEnv.sml index f54521dd5..80ad9352f 100644 --- a/src/Compiler/Regions/RegionStatEnv.sml +++ b/src/Compiler/Regions/RegionStatEnv.sml @@ -330,7 +330,7 @@ structure RegionStatEnv: REGION_STAT_ENV = dump(E.layout_effect_deep(node)); die "mkConeToplevel.closure.node not arrow effect or get/put effect") end) - acc (E.represents rho_eps) (* very nasty bug fixed; the two arguments to foldL were the wrong way around; mads *) + acc (E.represents_no_gets rho_eps) (* very nasty bug fixed; the two arguments to foldL were the wrong way around; mads *) else if E.is_rho rho_eps then rho_eps :: acc else acc end) @@ -362,7 +362,7 @@ structure RegionStatEnv: REGION_STAT_ENV = dump(E.layout_effect_deep(node)); die "mkConeToplevel.closure.node not arrow effect or get/put effect") ) - acc (E.represents rho_eps) + acc (E.represents_no_gets rho_eps) else if E.is_rho rho_eps then rho_eps :: acc else acc ) diff --git a/src/Compiler/regions.mlb b/src/Compiler/regions.mlb index 8bf03b594..4af353e2a 100644 --- a/src/Compiler/regions.mlb +++ b/src/Compiler/regions.mlb @@ -46,7 +46,6 @@ in in Regions/RegionExp.sml local open Pickle in Regions/RegionStatEnv.sml end end - Regions/EffVarEnv.sml ../Common/QUASI_ENV.sml local open Pickle in ../Common/QuasiEnv.sml end local open Edlib Pickle CompilerObjects in Regions/Mul.sml end diff --git a/src/Runtime/IO.c b/src/Runtime/IO.c index 580743aa8..398ab2215 100644 --- a/src/Runtime/IO.c +++ b/src/Runtime/IO.c @@ -136,10 +136,7 @@ REG_POLY_FUN_HDR(inputStream, Region rd, uintptr_t is1, size_t n) is = (FILE *)untag_scalar(is1); - if ( is_inf_and_atbot(rd) ) - { - resetRegion(rd); - } + maybeResetRegion(rd); // i = fread(buf,1,n,is); // return REG_POLY_CALL(convertBinStringToML, rd, i, buf); @@ -626,13 +623,8 @@ REG_POLY_FUN_HDR(sml_poll, Region listR, Region tupR, Context ctx, uintptr_t pde pollfds[i].revents = 0; } - if ( is_inf_and_atbot(listR) ) { - resetRegion(listR); - } - - if ( is_inf_and_atbot(tupR) ) { - resetRegion(tupR); - } + maybeResetRegion(listR); + maybeResetRegion(tupR); int res = poll(pollfds, n, tm); diff --git a/src/Runtime/Math.c b/src/Runtime/Math.c index c380da470..b8d49ac22 100644 --- a/src/Runtime/Math.c +++ b/src/Runtime/Math.c @@ -875,6 +875,7 @@ static void mkSMLMinus(char * s) { String REG_POLY_FUN_HDR(stringOfFloat, Region rAddr, size_t arg) { + maybeResetRegion(rAddr); char buf[64]; sprintf(buf, "%.12g", get_d(arg)); mkSMLMinus(buf); @@ -892,6 +893,7 @@ REG_POLY_FUN_HDR(stringOfFloat, Region rAddr, size_t arg) String REG_POLY_FUN_HDR(generalStringOfFloat, Region rAddr, String format, size_t f) { + maybeResetRegion(rAddr); size_t size = snprintf(NULL, 0, format->data, get_d(f)) + 1; char *buf = malloc(size); snprintf(buf, size, format->data, get_d(f)); diff --git a/src/Runtime/Posix.c b/src/Runtime/Posix.c index 1fae8aed6..9641c3593 100644 --- a/src/Runtime/Posix.c +++ b/src/Runtime/Posix.c @@ -351,10 +351,7 @@ REG_POLY_FUN_HDR(sml_readVec,uintptr_t pair, Region sr, int fd, int n1) String s; mkTagPairML(pair); n = convertIntToC(n1); - if ( is_inf_and_atbot(sr) ) - { - resetRegion(sr); - } + maybeResetRegion(sr); s = REG_POLY_CALL(allocStringC, sr, n+1); char *p = s->data; p[n] = '\0'; @@ -723,13 +720,15 @@ sml_findsignal(char *s) static String REG_POLY_FUN_HDR(sml_PosixName, Region rs, size_t e, struct syserr_entry arr[], size_t amount) { - size_t i = 0, j, k,n; - j = amount; + maybeResetRegion(rs); + if ( amount == 0 ) { return NULL; } + size_t i = 0; + size_t j = amount - 1; e = convertIntToC(e); while (i <= j) { - k = i + (j-i) / 2; - n = arr[k].number - e; + size_t k = i + (j-i) / 2; + int n = arr[k].number - e; if (n == 0) { return REG_POLY_CALL(convertStringToML, rs, arr[k].name); @@ -759,13 +758,12 @@ REG_POLY_FUN_HDR(sml_getgrgid, uintptr_t triple, Region nameR, Region memberList { uintptr_t res; uintptr_t *list, *pair; - char *b; struct group gbuf, *gbuf2; char **members; mkTagTripleML(triple); gid_t gid = (gid_t) convertIntToC(g); s = convertIntToC(s) + 1; - b = (char *) malloc(s); + char* b = (char *) malloc(s); if (!b) { res = errno; @@ -805,13 +803,12 @@ REG_POLY_FUN_HDR(sml_getgrnam, uintptr_t triple, Region memberListR, Region memb { uintptr_t res; uintptr_t *list, *pair; - char *b; struct group gbuf, *gbuf2; char **members; char *name = nameML->data; mkTagTripleML(triple); s = convertIntToC(s) + 1; - b = (char *) malloc(s); + char* b = (char *) malloc(s); if (!b) { res = errno; diff --git a/src/Runtime/Region.c b/src/Runtime/Region.c index b632cb7d9..e0654235a 100644 --- a/src/Runtime/Region.c +++ b/src/Runtime/Region.c @@ -26,10 +26,7 @@ RegionPageMap* regionPageMapInsert(RegionPageMap* regionPageMap, uintptr_t addr) { - int index; - RegionPageMapHashList* newElem; - - newElem = (RegionPageMapHashList*)malloc(sizeof(RegionPageMapHashList)); + RegionPageMapHashList* newElem = (RegionPageMapHashList*)malloc(sizeof(RegionPageMapHashList)); if ( newElem == NULL ) { die("regionPageMapInsert error"); } @@ -37,7 +34,7 @@ regionPageMapInsert(RegionPageMap* regionPageMap, uintptr_t addr) newElem->n = 1; newElem->addr = addr; - index = hashRegionPageIndex(addr); + int index = hashRegionPageIndex(addr); newElem->next = regionPageMap[index]; regionPageMap[index] = newElem; @@ -49,8 +46,7 @@ regionPageMapInsert(RegionPageMap* regionPageMap, uintptr_t addr) void regionPageMapZero(RegionPageMap* regionPageMap) { - int i; - for ( i = 0 ; i < REGION_PAGE_MAP_HASH_TABLE_SIZE ; i++ ) + for ( int i = 0 ; i < REGION_PAGE_MAP_HASH_TABLE_SIZE ; i++ ) { regionPageMap[i] = NULL; } @@ -59,9 +55,7 @@ regionPageMapZero(RegionPageMap* regionPageMap) RegionPageMap* regionPageMapNew(void) { - RegionPageMap* regionPageMap; - - regionPageMap = (RegionPageMap*)malloc(sizeof(long*) * REGION_PAGE_MAP_HASH_TABLE_SIZE); + RegionPageMap* regionPageMap = (RegionPageMap*)malloc(sizeof(long*) * REGION_PAGE_MAP_HASH_TABLE_SIZE); if ( regionPageMap == NULL ) { die("Unable to allocate memory for RegionPageMapHashTable"); } @@ -73,8 +67,7 @@ regionPageMapNew(void) RegionPageMap* regionPageMapIncr(RegionPageMap* regionPageMap, uintptr_t addr) { - RegionPageMapHashList* p; - for ( p = regionPageMap[hashRegionPageIndex(addr)]; p != NULL ; p = p->next ) + for ( RegionPageMapHashList* p = regionPageMap[hashRegionPageIndex(addr)]; p != NULL ; p = p->next ) { if ( p->addr == addr ) { @@ -88,8 +81,7 @@ regionPageMapIncr(RegionPageMap* regionPageMap, uintptr_t addr) uintptr_t regionPageMapLookup(RegionPageMap* regionPageMap, uintptr_t addr) { - RegionPageMapHashList* p; - for ( p = regionPageMap[hashRegionPageIndex(addr)]; p != NULL ; p = p->next ) + for ( RegionPageMapHashList* p = regionPageMap[hashRegionPageIndex(addr)]; p != NULL ; p = p->next ) { if ( p->addr == addr ) { @@ -102,15 +94,12 @@ regionPageMapLookup(RegionPageMap* regionPageMap, uintptr_t addr) void regionPageMapClear(RegionPageMap* regionPageMap) { - int i; - RegionPageMapHashList *p, *n; - - for ( i = 0 ; i < REGION_PAGE_MAP_HASH_TABLE_SIZE ; i++ ) + for ( int i = 0 ; i < REGION_PAGE_MAP_HASH_TABLE_SIZE ; i++ ) { - p = regionPageMap[i]; + RegionPageMapHashList *p = regionPageMap[i]; while ( p ) { - n = p->next; + RegionPageMapHashList *n = p->next; free(p); p = n; } @@ -219,15 +208,13 @@ void printTopRegInfo() { void pp_gen(Gen *gen) { - Rp* rp; - fprintf(stderr,"\n[Gen g%d at addr: %p, fp:%p, a:%p, b:%p\n", (is_gen_1(*gen)?1:0), gen, gen->fp, gen->a, rpBoundary(gen->a)); - for (rp = clear_fp(gen->fp) ; rp ; rp = clear_tospace_bit(rp->n)) { + for (Rp* rp = clear_fp(gen->fp) ; rp ; rp = clear_tospace_bit(rp->n)) { #ifdef ENABLE_GEN_GC fprintf(stderr," Rp %p, next:%p, colorPtr:%p, data: %p, rp+1: %p\n", rp, @@ -294,12 +281,11 @@ void printRegionStack() { inline size_t NoOfPagesInGen(Gen *gen) { - size_t i; - Rp *rp; + size_t i = 0; debug(printf("[NoOfPagesInGen...")); - for ( i = 0, rp = clear_fp(gen->fp) ; rp ; rp = clear_tospace_bit(rp->n) ) + for ( Rp* rp = clear_fp(gen->fp) ; rp ; rp = clear_tospace_bit(rp->n) ) i++; debug(printf("]\n")); @@ -342,12 +328,11 @@ printFreeList() size_t size_free_list() { - Rp *rp; size_t i=0; LOCK_LOCK(FREELISTMUTEX); - for ( rp = global_freelist ; rp ; rp = rp-> n ) + for ( Rp* rp = global_freelist ; rp ; rp = rp-> n ) i++; LOCK_UNLOCK(FREELISTMUTEX); @@ -382,8 +367,7 @@ alloc_new_page(Gen *gen) debug(printf("[alloc_new_page: gen: %p", gen);) #ifdef PROFILING - Ro *r; - r = get_ro_from_gen(*gen); + Ro *r = get_ro_from_gen(*gen); #endif /* PROFILING */ #ifdef PROFILING @@ -633,10 +617,6 @@ void free_lobjs(Lobjs* lobjs) * When profiling we also use this function. * *----------------------------------------------------------------------*/ void deallocateRegion(Context ctx) { -#ifdef PROFILING - int i; -#endif - debug(printf("[deallocateRegion... top region: %p\n", TOP_REGION)); CHECK_CTX("deallocateRegion"); @@ -645,7 +625,7 @@ void deallocateRegion(Context ctx) { callsOfDeallocateRegionInf++; regionDescUseInf -= (sizeRo-sizeRoProf); regionDescUseProfInf -= sizeRoProf; - i = NoOfPagesInRegion(TOP_REGION); + int i = NoOfPagesInRegion(TOP_REGION); noOfPages -= i; allocNowInf -= TOP_REGION->allocNow; allocProfNowInf -= TOP_REGION->allocProfNow; @@ -678,7 +658,6 @@ void deallocateRegion(Context ctx) { TOP_REGION = TOP_REGION->p; debug(printf("]\n")); - return; } @@ -686,26 +665,33 @@ inline static Lobjs * alloc_lobjs(int n) { Lobjs* lobjs; #ifdef ENABLE_GC - char *p; size_t r; - size_t sz_bytes; - sz_bytes = sizeof(uintptr_t)*n + sizeof(Lobjs) + sizeof(Rp); /* ensure alignment on Rp boundaries */ - p = malloc(sz_bytes); + size_t sz_bytes = sizeof(uintptr_t)*n + sizeof(Lobjs) + sizeof(Rp); /* ensure alignment on Rp boundaries */ + char* p = malloc(sz_bytes); if ( p == NULL ) - die("alloc_lobjs: malloc returned NULL"); - if ( (r = (size_t)p % sizeof(Rp)) ) { - lobjs = (Lobjs*)(p + sizeof(Rp) - r); - } else { - lobjs = (Lobjs*)p; - } + { + die("alloc_lobjs: malloc returned NULL"); + } + if ( (r = (size_t)p % sizeof(Rp)) ) + { + lobjs = (Lobjs*)(p + sizeof(Rp) - r); + } + else + { + lobjs = (Lobjs*)p; + } //fprintf(stderr, "Allocated large obj: p=%p; r=%x; lobjs=%p; last_byte=%p; sz_bytes=%d\n", p, r, lobjs, p + sz_bytes, sz_bytes); if ( ! is_rp_aligned((size_t)lobjs) ) - die("alloc_lobjs: large object is not properly aligned."); + { + die("alloc_lobjs: large object is not properly aligned."); + } lobjs->orig = p; #else lobjs = (Lobjs*)malloc(sizeof(uintptr_t)*n + sizeof(Lobjs)); if ( lobjs == NULL ) - die("alloc_lobjs: malloc returned NULL"); + { + die("alloc_lobjs: malloc returned NULL"); + } #endif /* ENABLE_GC */ return lobjs; } @@ -716,9 +702,6 @@ alloc_lobjs(int n) { * The free list has to be empty. * *----------------------------------------------------------------------*/ void callSbrk() { - Rp *np, *old_free_list; - char *sb; - size_t temp; #ifdef PROFILING callsOfSbrk++; @@ -729,7 +712,7 @@ void callSbrk() { /* For GC we require alignments according to the size of region pages! */ - sb = malloc(BYTES_ALLOC_BY_SBRK + sizeof(Rp) + sizeof(Rp) ); + char* sb = malloc(BYTES_ALLOC_BY_SBRK + sizeof(Rp) + sizeof(Rp) ); if ( sb == NULL ) { perror("I could not allocate more memory; either no more memory is\navailable or the memory subsystem is detectively corrupted\n"); @@ -737,6 +720,7 @@ void callSbrk() { } /* alignment (martin) */ + size_t temp; if (( temp = (size_t)(((uintptr_t)sb) % sizeof(Rp) ))) { sb = sb + sizeof(Rp) - temp; } @@ -749,8 +733,8 @@ void callSbrk() { die("SBRK region page is not properly aligned."); } - old_free_list = global_freelist; - np = (Rp *) sb; + Rp* old_free_list = global_freelist; + Rp* np = (Rp *) sb; global_freelist = np; rp_total++; @@ -1033,7 +1017,6 @@ void resetGen(Gen *gen) #endif /* ENABLE_GC */ MAYBE_DEFINE_CONTEXT; - (last_rp_of_gen(gen))->n = FREELIST; FREELIST = (clear_fp(gen->fp))->n; (clear_fp(gen->fp))->n = NULL; @@ -1050,19 +1033,12 @@ void resetGen(Gen *gen) Region resetRegion(Region rAdr) { - Ro *r; - -#ifdef PROFILING - int j; -#endif - debug(printf("[resetRegions...")); - - r = clearStatusBits(rAdr); + Ro* r = clearStatusBits(rAdr); #ifdef PROFILING callsOfResetRegion++; - j = NoOfPagesInRegion(r); + int j = NoOfPagesInRegion(r); /* There is always at-least one page in a generation. */ noOfPages -= j-MIN_NO_OF_PAGES_IN_REGION; @@ -1079,7 +1055,6 @@ resetRegion(Region rAdr) #endif /* ENABLE_GEN_GC */ free_lobjs(r->lobjs); - r->lobjs = NULL; #ifdef PROFILING @@ -1088,10 +1063,22 @@ resetRegion(Region rAdr) #endif debug(printf("]\n")); - return rAdr; /* We preserve rAdr and the status bits. */ } +// ---------------------------------------------------------------- +// maybeResetRegion(r): +// Reset region r if the inf-bit and the atbot-bit is set +// ---------------------------------------------------------------- +inline void +maybeResetRegion(Region r) { + if ( is_inf_and_atbot(r) ) + { + resetRegion(r); + } +} + + /*-------------------------------------------------------------------------* * deallocateRegionsUntil: * * It is called with rAddr=sp, which do not necessarily point at a region * diff --git a/src/Runtime/Region.h b/src/Runtime/Region.h index 2c2080d44..4eb7a69db 100644 --- a/src/Runtime/Region.h +++ b/src/Runtime/Region.h @@ -469,6 +469,7 @@ Region allocTripleRegionInfiniteProfilingMaybeUnTag(Context ctx, Region r, size_ #endif /* ENABLE_GC */ Region resetRegion(Region r); +void maybeResetRegion(Region r); size_t NoOfPagesInRegion(Region r); size_t NoOfPagesInGen(Gen* gen); diff --git a/src/Runtime/String.c b/src/Runtime/String.c index 2e474987d..7b198ebf0 100644 --- a/src/Runtime/String.c +++ b/src/Runtime/String.c @@ -105,46 +105,41 @@ String REG_POLY_FUN_HDR(allocStringML, Region rAddr, size_t sizeML) { size_t sizeC = convertIntToC(sizeML); - String strPtr; - // maybe reset region - if ( is_inf_and_atbot(rAddr) ) - { - resetRegion(rAddr); - } - - strPtr = REG_POLY_CALL(allocString, rAddr, sizeC); + maybeResetRegion(rAddr); + String strPtr = REG_POLY_CALL(allocString, rAddr, sizeC); return strPtr; } String REG_POLY_FUN_HDR(allocStringC, Region rAddr, size_t sizeC) { - String strPtr; - strPtr = REG_POLY_CALL(allocString, rAddr, sizeC); + String strPtr = REG_POLY_CALL(allocString, rAddr, sizeC); return strPtr; } String -REG_POLY_FUN_HDR(concatStringML, Region rAddr, String str1, String str2) +REG_POLY_FUN_HDR(concatStringML, Region r, String s1, String s2) { - String res; - char *s, *p; - size_t i, sz; - - // resetting not possible due to possible aliasing - - debug(printf("[enter concatStringML (rAddr=%p,str1=%p,str2=%p)]\n", rAddr,str1,str2);) - sz = sizeStringDefine(str1) + sizeStringDefine(str2); - res = REG_POLY_CALL(allocString, rAddr, sz); - p = res->data; - s = str1->data; - for ( i = 0; i < sizeStringDefine(str1); i++) + debug(printf("[enter concatStringML (r=%p,s1=%p,s2=%p)]\n",r,s1,s2);) + + // Notice that _modular storage mode analysis_ ensures that if the atbot-bit + // is set, there is no aliasing between the result region and the region + // holding the argument strings... + maybeResetRegion(r); + + size_t sz1 = sizeStringDefine(s1); + size_t sz2 = sizeStringDefine(s2); + size_t sz = sz1 + sz2; + String res = REG_POLY_CALL(allocString, r, sz); + char* p = res->data; + char* s = s1->data; + for ( size_t i = 0; i < sz1; i++) { *p++ = *s++; } - s = str2->data; - for ( i = 0; i < sizeStringDefine(str2); i++) + s = s2->data; + for ( size_t i = 0; i < sz2; i++) { *p++ = *s++; } @@ -157,26 +152,17 @@ REG_POLY_FUN_HDR(concatStringML, Region rAddr, String str1, String str2) String REG_POLY_FUN_HDR(implodeCharsML, Region rAddr, uintptr_t xs) { - String res; - size_t length = 0; - size_t ys; - char *p; - - // maybe reset region - if ( is_inf_and_atbot(rAddr) ) - { - resetRegion(rAddr); - } + maybeResetRegion(rAddr); // calculate length of string - for ( ys = xs; isCONS(ys); ys = tl(ys) ) + size_t len = 0; + for ( size_t ys = xs; isCONS(ys); ys = tl(ys) ) { - length++; + len++; } - - res = REG_POLY_CALL(allocString, rAddr, length); - p = res->data; - for ( ys = xs; isCONS(ys); ys = tl(ys) ) + String res = REG_POLY_CALL(allocString, rAddr, len); + char* p = res->data; + for ( size_t ys = xs; isCONS(ys); ys = tl(ys) ) { *p++ = (unsigned char) convertIntToC (hd(ys)); } @@ -190,27 +176,26 @@ REG_POLY_FUN_HDR(implodeCharsML, Region rAddr, uintptr_t xs) String REG_POLY_FUN_HDR(implodeStringML, Region rAddr, uintptr_t xs) { - String res; - size_t sz=0; - size_t ys; - char *p; + // Notice that _modular storage mode analysis_ ensures that if the atbot-bit + // is set, there is no aliasing between the result region and the region + // holding the argument strings... + maybeResetRegion(rAddr); + + size_t sz = 0; // calculate string length and allocate - for ( ys = xs; isCONS(ys); ys = tl(ys) ) + for ( size_t ys = xs; isCONS(ys); ys = tl(ys) ) { sz += sizeStringDefine(hd(ys)); } - res = REG_POLY_CALL(allocString, rAddr, sz); + String res = REG_POLY_CALL(allocString, rAddr, sz); - p = res->data; - for ( ys = xs; isCONS(ys); ys = tl(ys) ) + char* p = res->data; + for ( size_t ys = xs; isCONS(ys); ys = tl(ys) ) { - String sd; - size_t i; - char *s; - sd = (String)hd(ys); - s = sd->data; - for ( i = 0; i < sizeStringDefine(sd); i++ ) + String sd = (String)hd(ys); + char* s = sd->data; + for ( size_t i = 0; i < sizeStringDefine(sd); i++ ) { *p++ = *s++; } @@ -325,53 +310,6 @@ REG_POLY_FUN_HDR(exnNameML, Region rAddr, uintptr_t e) return REG_POLY_CALL(convertStringToML, rAddr, ml_s->data); } -/* explodeStringML(rAddr, str): convert a string to a char list. - * A list is kept in one region, pointed to by rAddr. */ - -uintptr_t * -REG_POLY_FUN_HDR(explodeStringML, Region rAddr, String str) -{ - uintptr_t *res, *consPtr, *pair, *tpair; - size_t i, sz; - char *p; - - sz = sizeStringDefine(str); - if (sz == 0) - { - makeNIL(res); - return res; - } - - // save first char such that we can return a pointer to it - p = str->data; - -#ifdef PROFILING - allocPairMLProf(rAddr, pair, pPoint); -#else - allocPairML(rAddr, pair); -#endif - - first(pair) = convertIntToML (*p); - makeCONS(pair, consPtr); - res = consPtr; - for ( i = 1 ; i < sz; i++ ) - { - #ifdef PROFILING - allocPairMLProf(rAddr, tpair, pPoint); - #else - allocPairML(rAddr, tpair); - #endif - - first(tpair) = convertIntToML (*p++); - makeCONS(tpair, consPtr); - second(pair) = (size_t)consPtr; - pair = tpair; - } - makeNIL(consPtr); - second(pair) = (size_t)consPtr; - return res; -} - // for debugging */ void printNum(ssize_t n) diff --git a/src/Runtime/String.h b/src/Runtime/String.h index ce714693e..70a164455 100644 --- a/src/Runtime/String.h +++ b/src/Runtime/String.h @@ -54,6 +54,5 @@ String REG_POLY_FUN_HDR(implodeStringML, Region rAddr, size_t xs); String REG_POLY_FUN_HDR(convertStringToML, Region rAddr, const char *cStr); String REG_POLY_FUN_HDR(convertBinStringToML, Region rAddr, size_t l, const char *cStr); String REG_POLY_FUN_HDR(exnNameML, Region rAddr, uintptr_t e); -size_t * REG_POLY_FUN_HDR(explodeStringML, Region rAddr2, String str); // no region for the cons cells #endif /* STRING_H */ diff --git a/test/all.tst b/test/all.tst index 4ab46f405..2aded784b 100644 --- a/test/all.tst +++ b/test/all.tst @@ -183,4 +183,5 @@ foldbug.sml seltuptup.sml poll.sml enum-eq.sml -stringconcat.sml noopt (* check transformation of calls to argument-transformed functions *) \ No newline at end of file +stringconcat.sml noopt (* check transformation of calls to argument-transformed functions *) +sma.sml noopt (* storage mode error - issue #208 *) \ No newline at end of file diff --git a/test/sma.sml b/test/sma.sml new file mode 100644 index 000000000..b34c654e2 --- /dev/null +++ b/test/sma.sml @@ -0,0 +1,18 @@ +(* Compile with mlkit -Pcee -no_cfold -maximum_inline_size 0 -no_gc *) + +local +fun pr s = (print s; print "\n") + +fun f (g: unit -> unit) : string = + let val y : string = implode [#"H", #"i"] + in g() + ; y + end + +fun run () : unit = + pr let val x : string = implode [#"H", #"e", #"j"] + in if false then x else f (fn () => pr x) + end +in +val () = run() +end diff --git a/test/sma.sml.out.ok b/test/sma.sml.out.ok new file mode 100644 index 000000000..155899c79 --- /dev/null +++ b/test/sma.sml.out.ok @@ -0,0 +1,2 @@ +Hej +Hi