diff --git a/AUTHORS.txt b/AUTHORS.txt index f710d6bd71f3f..34b10a8cc76a9 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -13,6 +13,7 @@ Armin Ronacher Austin Seipp Ben Blum Ben Striegel +Benjamin Herr Benjamin Jackman Benjamin Kircher Brendan Eich diff --git a/doc/lib/codemirror-rust.js b/doc/lib/codemirror-rust.js index 900180c5eb5e3..6c05ee252b11d 100644 --- a/doc/lib/codemirror-rust.js +++ b/doc/lib/codemirror-rust.js @@ -5,11 +5,11 @@ CodeMirror.defineMode("rust", function() { "do": "else-style", "ret": "else-style", "fail": "else-style", "break": "atom", "cont": "atom", "const": "let", "resource": "fn", "let": "let", "fn": "fn", "for": "for", "alt": "alt", "iface": "iface", - "impl": "impl", "type": "type", "enum": "enum", "mod": "mod", + "impl": "impl", "type": "type", "enum": "enum", "class": "atom", "mod": "mod", "as": "op", "true": "atom", "false": "atom", "assert": "op", "check": "op", "claim": "op", "extern": "ignore", "unsafe": "ignore", "import": "else-style", "export": "else-style", "copy": "op", "log": "op", "log_err": "op", - "use": "op", "bind": "op", "self": "atom" + "use": "op", "bind": "op", "self": "atom", "new": "atom" }; var typeKeywords = function() { var keywords = {"fn": "fn"}; diff --git a/doc/rust.css b/doc/rust.css index db45c0e5426b8..d68f0b851c25b 100644 --- a/doc/rust.css +++ b/doc/rust.css @@ -57,3 +57,10 @@ h1.title { background-repeat: no-repeat; background-position: right; } + +blockquote { + color: black; + background-color: lavender; + margin: 1em; + padding: 0.5em 1em 0.5em 1em; +} \ No newline at end of file diff --git a/doc/rust.md b/doc/rust.md index 074913c8bf1e6..f002393b71ace 100644 --- a/doc/rust.md +++ b/doc/rust.md @@ -209,9 +209,9 @@ import export use mod The keywords in [source files](#source-files) are the following strings: ~~~~~~~~ {.keyword} -alt assert +alt again assert break -check claim class const cont copy +check claim class const copy drop else enum export extern fail false fn for @@ -2034,19 +2034,19 @@ break_expr : "break" ; Executing a `break` expression immediately terminates the innermost loop enclosing it. It is only permitted in the body of a loop. -### Continue expressions +### Again expressions ~~~~~~~~{.ebnf .gram} -break_expr : "cont" ; +again_expr : "again" ; ~~~~~~~~ -Evaluating a `cont` expression immediately terminates the current iteration of +Evaluating an `again` expression immediately terminates the current iteration of the innermost loop enclosing it, returning control to the loop *head*. In the case of a `while` loop, the head is the conditional expression controlling the -loop. In the case of a `for` loop, the head is the vector-element increment -controlling the loop. +loop. In the case of a `for` loop, the head is the call-expression controlling +the loop. -A `cont` expression is only permitted in the body of a loop. +An `again` expression is only permitted in the body of a loop. ### For expressions diff --git a/doc/tutorial.md b/doc/tutorial.md index 51805f4feea5d..c15135b998f73 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -11,7 +11,7 @@ comparisons to other languages in the C family. The tutorial covers the whole language, though not with the depth and precision of the [language reference](rust.html). -## Language Overview +## Language overview Rust is a systems programming language with a focus on type safety, memory safety, concurrency and performance. It is intended for writing @@ -38,7 +38,7 @@ high-level features include: * Generics - Functions and types can be parameterized over generic types with optional type constraints -## First Impressions +## First impressions As a curly-brace language in the tradition of C, C++, and JavaScript, Rust looks a lot like other languages you may be familiar with. @@ -348,9 +348,6 @@ Rust identifiers must start with an alphabetic character or an underscore, and after that may contain any alphanumeric character, and more underscores. -***Note:*** The parser doesn't currently recognize non-ascii alphabetic -characters. This is a bug that will eventually be fixed. - The double-colon (`::`) is used as a module separator, so `io::println` means 'the thing named `println` in the module named `io`'. @@ -383,6 +380,14 @@ fn main() { } ~~~~ +Local variables may shadow earlier declarations, causing the +previous variable to go out of scope. + +~~~~ +let my_favorite_value: float = 57.8; +let my_favorite_value: int = my_favorite_value as int; +~~~~ + ## Types The `-> bool` in the `is_four` example is the way a function's return @@ -473,9 +478,7 @@ a type error. Read about [single-variant enums](#single_variant_enum) further on if you need to create a type name that's not just a synonym. -## Literals - -### Numeric literals +## Numeric literals Integers can be written in decimal (`144`), hexadecimal (`0x90`), and binary (`0b10010000`) base. @@ -534,7 +537,7 @@ and `f64` can be used to create literals of a specific type. The suffix `f` can be used to write `float` literals without a dot or exponent: `3f`. -### Other literals +## Other literals The nil literal is written just like the type: `()`. The keywords `true` and `false` produce the boolean literals. @@ -783,7 +786,7 @@ a specific value, are not allowed. `while` produces a loop that runs as long as its given condition (which must have type `bool`) evaluates to true. Inside a loop, the -keyword `break` can be used to abort the loop, and `cont` can be used +keyword `break` can be used to abort the loop, and `again` can be used to abort the current iteration and continue with the next. ~~~~ @@ -822,6 +825,21 @@ handle the failure, allowing the program to continue running. to access a vector out of bounds, or running a pattern match with no matching clauses, both result in the equivalent of a `fail`. +## Assertions + +The keyword `assert`, followed by an expression with boolean type, +will check that the given expression results in `true`, and cause a +failure otherwise. It is typically used to double-check things that +*should* hold at a certain point in a program. `assert` statements are +always active; there is no way to build Rust code with assertions +disabled. + +~~~~ +let mut x = 100; +while (x > 10) { x -= 10; } +assert x == 10; +~~~~ + ## Logging Rust has a built-in logging mechanism, using the `log` statement. @@ -864,113 +882,61 @@ and will log the formatted string: Because the macros `#debug`, `#warn`, and `#error` expand to calls to `log`, their arguments are also lazily evaluated. -## Assertions +# Functions -The keyword `assert`, followed by an expression with boolean type, -will check that the given expression results in `true`, and cause a -failure otherwise. It is typically used to double-check things that -*should* hold at a certain point in a program. `assert` statements are -always active; there is no way to build Rust code with assertions -disabled. +Like all other static declarations, such as `type`, functions can be +declared both at the top level and inside other functions (or modules, +which we'll come back to [later](#modules-and-crates)). + +We've already seen several function definitions. They are introduced +with the `fn` keyword, the type of arguments are specified following +colons and the return type follows the arrow. ~~~~ -let mut x = 100; -while (x > 10) { x -= 10; } -assert x == 10; +fn int_to_str(i: int) -> str { + ret "tube sock"; +} ~~~~ -# The Rust Memory Model - -At this junction let's take a detour to explain the concepts involved -in Rust's memory model. Rust has a very particular approach to -memory management that plays a significant role in shaping the "feel" -of the language. Understanding the memory landscape will illuminate -several of Rust's unique features as we encounter them. - -Rust has three competing goals that inform its view of memory: - -* Memory safety - memory that is managed by and is accessible to - the Rust language must be guaranteed to be valid. Under normal - circumstances it is impossible for Rust to trigger a segmentation - fault or leak memory -* Performance - high-performance low-level code tends to employ - a number of allocation strategies. low-performance high-level - code often uses a single, GC-based, heap allocation strategy -* Concurrency - Rust maintain memory safety guarantees even - for code running in parallel - -## How performance considerations influence the memory model - -Many languages that ofter the kinds of memory safety guarentees that -Rust does have a single allocation strategy: objects live on the heap, -live for as long as they are needed, and are periodically garbage -collected. This is very straightforword both conceptually and in -implementation, but has very significant costs. Such languages tend to -aggressively pursue ways to ameliorate allocation costs (think the -Java virtual machine). Rust supports this strategy with _shared -boxes_, memory allocated on the heap that may be referred to (shared) -by multiple variables. - -In comparison, languages like C++ offer a very precise control over -where objects are allocated. In particular, it is common to put -them directly on the stack, avoiding expensive heap allocation. In -Rust this is possible as well, and the compiler will use a clever -lifetime analysis to ensure that no variable can refer to stack -objects after they are destroyed. - -## How concurrency considerations influence the memory model - -Memory safety in a concurrent environment tends to mean avoiding race -conditions between two threads of execution accessing the same -memory. Even high-level languages frequently avoid solving this -problem, requiring programmers to correctly employ locking to unsure -their program is free of races. - -Rust starts from the position that memory simply cannot be shared -between tasks. Experience in other languages has proven that isolating -each tasks' heap from each other is a reliable strategy and one that -is easy for programmers to reason about. Having isolated heaps -additionally means that garbage collection must only be done -per-heap. Rust never 'stops the world' to garbage collect memory. - -If Rust tasks have completely isolated heaps then that seems to imply -that any data transferred between them must be copied. While this -is a fine and useful way to implement communication between tasks, -it is also very inefficient for large data structures. - -Because of this Rust also introduces a global "exchange heap". Objects -allocated here have _ownership semantics_, meaning that there is only -a single variable that refers to them. For this reason they are -refered to as _unique boxes_. All tasks may allocate objects on this -heap, then _move_ those allocations to other tasks, avoiding expensive -copies. +The `ret` keyword immediately returns from the body of a function. It +is optionally followed by an expression to return. A function can +also return a value by having its top level block produce an +expression. -## What to be aware of +~~~~ +# const copernicus: int = 0; +fn int_to_str(i: int) -> str { + if i == copernicus { + ret "tube sock"; + } else { + ret "violin"; + } +} +~~~~ -Rust has three "realms" in which objects can be allocated: the stack, -the local heap, and the exchange heap. These realms have corresponding -pointer types: the borrowed pointer (`&T`), the shared pointer (`@T`), -and the unique pointer (`~T`). These three sigils will appear -repeatedly as we explore the language. Learning the appropriate role -of each is key to using Rust effectively. +~~~~ +# const copernicus: int = 0; +fn int_to_str(i: int) -> str { + if i == copernicus { "tube sock" } + else { "violin" } +} +~~~~ -# Functions +Functions that do not return a value are said to return nil, `()`, +and both the return type and the return value may be omitted from +the definition. The following two functions are equivalent. -Like all other static declarations, such as `type`, functions can be -declared both at the top level and inside other functions (or modules, -which we'll come back to in moment). +~~~~ +fn do_nothing_the_hard_way() -> () { ret (); } -The `ret` keyword immediately returns from a function. It is -optionally followed by an expression to return. In functions that -return `()`, the returned expression can be left off. A function can -also return a value by having its top level block produce an -expression (by omitting the final semicolon). +fn do_nothing_the_easy_way() { } +~~~~ Some functions (such as the C function `exit`) never return normally. In Rust, these are annotated with the pseudo-return type '`!`': ~~~~ -fn dead_end() -> ! { fail; } +fn dead_end() -> ! { fail } ~~~~ This helps the compiler avoid spurious error messages. For example, @@ -987,627 +953,844 @@ let dir = if can_go_left() { left } else { dead_end(); }; ~~~~ -## Closures +# Basic datatypes -Named functions, like those in the previous section, may not refer -to local variables decalared outside the function - they do not -close over their environment. For example you couldn't write the -following: +The core datatypes of Rust are structural records, enums (tagged +unions, algebraic data types), and tuples. They are immutable +by default. -~~~~ {.ignore} -let foo = 10; +~~~~ +type point = {x: float, y: float}; -fn bar() -> int { - ret foo; // `bar` cannot refer to `foo` +enum shape { + circle(point, float), + rectangle(point, point) } ~~~~ -Rust also supports _closures_, functions that can access variables in -the enclosing scope. +## Records -~~~~ -# import println = io::println; -fn call_closure_with_ten(b: fn(int)) { b(10); } +Rust record types are written `{field1: T1, field2: T2 [, ...]}`, +where `T1`, `T2`, ... denote types. Record literals are written in +the same way, but with expressions instead of types. They are quite +similar to C structs, and even laid out the same way in memory (so you +can read from a Rust struct in C, and vice-versa). The dot operator is +used to access record fields (`mypoint.x`). -let captured_var = 20; -let closure = |arg| println(#fmt("captured_var=%d, arg=%d", captured_var, arg)); +Fields that you want to mutate must be explicitly marked `mut`. -call_closure_with_ten(closure); +~~~~ +type stack = {content: ~[int], mut head: uint}; ~~~~ -The types of the arguments are generally omitted, as is the return -type, because the compiler can almost always infer them. In the rare -case where the compiler needs assistance though, the arguments and -return types may be annotated. +With such a type, you can do `mystack.head += 1u`. If `mut` were +omitted from the type, such an assignment would result in a type +error. + +To create a new record based on the value of an existing record +you construct it using the `with` keyword: ~~~~ -# type mygoodness = fn(str) -> str; type what_the = int; -let bloop = |well, oh: mygoodness| -> what_the { fail oh(well) }; +let oldpoint = {x: 10f, y: 20f}; +let newpoint = {x: 0f with oldpoint}; +assert newpoint == {x: 0f, y: 20f}; ~~~~ -There are several forms of closure, each with its own role. The most -common, called a _stack closure_, has type `fn&` and can directly -access local variables in the enclosing scope. +This will create a new record, copying all the fields from `oldpoint` +into it, except for the ones that are explicitly set in the literal. + +Rust record types are *structural*. This means that `{x: float, y: +float}` is not just a way to define a new type, but is the actual name +of the type. Record types can be used without first defining them. If +module A defines `type point = {x: float, y: float}`, and module B, +without knowing anything about A, defines a function that returns an +`{x: float, y: float}`, you can use that return value as a `point` in +module A. (Remember that `type` defines an additional name for a type, +not an actual new type.) + +## Record patterns + +Records can be destructured in `alt` patterns. The basic syntax is +`{fieldname: pattern, ...}`, but the pattern for a field can be +omitted as a shorthand for simply binding the variable with the same +name as the field. ~~~~ -let mut max = 0; -[1, 2, 3].map(|x| if x > max { max = x }); +# let mypoint = {x: 0f, y: 0f}; +alt mypoint { + {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ } + {x, y} { /* Simply bind the fields */ } +} ~~~~ -Stack closures are very efficient because their environment is -allocated on the call stack and refers by pointer to captured -locals. To ensure that stack closures never outlive the local -variables to which they refer, they can only be used in argument -position and cannot be stored in structures nor returned from -functions. Despite the limitations stack closures are used -pervasively in Rust code. +The field names of a record do not have to appear in a pattern in the +same order they appear in the type. When you are not interested in all +the fields of a record, a record pattern may end with `, _` (as in +`{field1, _}`) to indicate that you're ignoring all other fields. -### Boxed closures +## Enums -When you need to store a closure in a data structure, a stack closure -will not do, since the compiler will refuse to let you store it. For -this purpose, Rust provides a type of closure that has an arbitrary -lifetime, written `fn@` (boxed closure, analogous to the `@` pointer -type described in the next section). +Enums are datatypes that have several alternate representations. For +example, consider the type shown earlier: -A boxed closure does not directly access its environment, but merely -copies out the values that it closes over into a private data -structure. This means that it can not assign to these variables, and -will not 'see' updates to them. +~~~~ +# type point = {x: float, y: float}; +enum shape { + circle(point, float), + rectangle(point, point) +} +~~~~ -This code creates a closure that adds a given string to its argument, -returns it from a function, and then calls it: +A value of this type is either a circle, in which case it contains a +point record and a float, or a rectangle, in which case it contains +two point records. The run-time representation of such a value +includes an identifier of the actual form that it holds, much like the +'tagged union' pattern in C, but with better ergonomics. -~~~~ -use std; +The above declaration will define a type `shape` that can be used to +refer to such shapes, and two functions, `circle` and `rectangle`, +which can be used to construct values of the type (taking arguments of +the specified types). So `circle({x: 0f, y: 0f}, 10f)` is the way to +create a new circle. -fn mk_appender(suffix: str) -> fn@(str) -> str { - ret fn@(s: str) -> str { s + suffix }; -} +Enum variants need not have type parameters. This, for example, is +equivalent to a C enum: -fn main() { - let shout = mk_appender("!"); - io::println(shout("hey ho, let's go")); +~~~~ +enum direction { + north, + east, + south, + west } ~~~~ -This example uses the long closure syntax, `fn@(s: str) ...`, -making the fact that we are declaring a box closure explicit. In -practice boxed closures are usually defined with the short closure -syntax introduced earlier, in which case the compiler will infer -the type of closure. Thus our boxed closure example could also -be written: +This will define `north`, `east`, `south`, and `west` as constants, +all of which have type `direction`. + +When an enum is C-like, that is, when none of the variants have +parameters, it is possible to explicitly set the discriminator values +to an integer value: ~~~~ -fn mk_appender(suffix: str) -> fn@(str) -> str { - ret |s| s + suffix; +enum color { + red = 0xff0000, + green = 0x00ff00, + blue = 0x0000ff } ~~~~ -### Unique closures - -Unique closures, written `fn~` in analogy to the `~` pointer type (see -next section), hold on to things that can safely be sent between -processes. They copy the values they close over, much like boxed -closures, but they also 'own' them—meaning no other code can access -them. Unique closures are used in concurrent code, particularly -for spawning [tasks](#tasks). +If an explicit discriminator is not specified for a variant, the value +defaults to the value of the previous variant plus one. If the first +variant does not have a discriminator, it defaults to 0. For example, +the value of `north` is 0, `east` is 1, etc. -### Closure compatibility +When an enum is C-like the `as` cast operator can be used to get the +discriminator's value. -A nice property of Rust closures is that you can pass any kind of -closure (as long as the arguments and return types match) to functions -that expect a `fn()`. Thus, when writing a higher-order function that -wants to do nothing with its function argument beyond calling it, you -should almost always specify the type of that argument as `fn()`, so -that callers have the flexibility to pass whatever they want. + + +There is a special case for enums with a single variant. These are +used to define new types in such a way that the new name is not just a +synonym for an existing type, but its own distinct type. If you say: ~~~~ -fn call_twice(f: fn()) { f(); f(); } -call_twice(|| { "I am an inferred stack closure"; } ); -call_twice(fn&() { "I am also a stack closure"; } ); -call_twice(fn@() { "I am a boxed closure"; }); -call_twice(fn~() { "I am a unique closure"; }); -fn bare_function() { "I am a plain function"; } -call_twice(bare_function); +enum gizmo_id = int; ~~~~ -### Do syntax - -Closures in Rust are frequently used in combination with higher-order -functions to simulate control structures like `if` and -`loop`. Consider this function that iterates over a vector of -integers, applying an operator to each: +That is a shorthand for this: ~~~~ -fn each(v: ~[int], op: fn(int)) { - let mut n = 0; - while n < v.len() { - op(v[n]); - n += 1; - } -} +enum gizmo_id { gizmo_id(int) } ~~~~ -As a caller, if we use a closure to provide the final operator -argument, we can write it in a way that has a pleasant, block-like -structure. +Enum types like this can have their content extracted with the +dereference (`*`) unary operator: ~~~~ -# fn each(v: ~[int], op: fn(int)) {} -# fn do_some_work(i: int) { } -each(~[1, 2, 3], |n| { - #debug("%i", n); - do_some_work(n); -}); +# enum gizmo_id = int; +let my_gizmo_id = gizmo_id(10); +let id_int: int = *my_gizmo_id; ~~~~ -This is such a useful pattern that Rust has a special form of function -call that can be written more like a built-in control structure: +## Enum patterns + +For enum types with multiple variants, destructuring is the only way to +get at their contents. All variant constructors can be used as +patterns, as in this definition of `area`: ~~~~ -# fn each(v: ~[int], op: fn(int)) {} -# fn do_some_work(i: int) { } -do each(~[1, 2, 3]) |n| { - #debug("%i", n); - do_some_work(n); +# type point = {x: float, y: float}; +# enum shape { circle(point, float), rectangle(point, point) } +fn area(sh: shape) -> float { + alt sh { + circle(_, size) { float::consts::pi * size * size } + rectangle({x, y}, {x: x2, y: y2}) { (x2 - x) * (y2 - y) } + } } ~~~~ -The call is prefixed with the keyword `do` and, instead of writing the -final closure inside the argument list it is moved outside of the -parenthesis where it looks visually more like a typical block of -code. The `do` expression is purely syntactic sugar for a call that -takes a final closure argument. - -`do` is often used for task spawning. +Another example, matching nullary enum variants: ~~~~ -import task::spawn; - -do spawn() || { - #debug("I'm a task, whatever"); +# type point = {x: float, y: float}; +# enum direction { north, east, south, west } +fn point_from_direction(dir: direction) -> point { + alt dir { + north { {x: 0f, y: 1f} } + east { {x: 1f, y: 0f} } + south { {x: 0f, y: -1f} } + west { {x: -1f, y: 0f} } + } } ~~~~ -That's nice, but look at all those bars and parentheses - that's two empty -argument lists back to back. Wouldn't it be great if they weren't -there? +## Tuples + +Tuples in Rust behave exactly like records, except that their fields +do not have names (and can thus not be accessed with dot notation). +Tuples can have any arity except for 0 or 1 (though you may consider +nil, `()`, as the empty tuple if you like). ~~~~ -# import task::spawn; -do spawn { - #debug("Kablam!"); +let mytup: (int, int, float) = (10, 20, 30.0); +alt mytup { + (a, b, c) { log(info, a + b + (c as int)); } } ~~~~ -Empty argument lists can be omitted from `do` expressions. +# The Rust Memory Model -### For loops +At this junction let's take a detour to explain the concepts involved +in Rust's memory model. Rust has a very particular approach to +memory management that plays a significant role in shaping the "feel" +of the language. Understanding the memory landscape will illuminate +several of Rust's unique features as we encounter them. -Most iteration in Rust is done with `for` loops. Like `do`, -`for` is a nice syntax for doing control flow with closures. -Additionally, within a `for` loop, `break`, `cont`, and `ret` -work just as they do with `while` and `loop`. +Rust has three competing goals that inform its view of memory: -Consider again our `each` function, this time improved to -break early when the iteratee returns `false`: +* Memory safety - memory that is managed by and is accessible to + the Rust language must be guaranteed to be valid. Under normal + circumstances it is impossible for Rust to trigger a segmentation + fault or leak memory +* Performance - high-performance low-level code tends to employ + a number of allocation strategies. low-performance high-level + code often uses a single, GC-based, heap allocation strategy +* Concurrency - Rust must maintain memory safety guarantees even + for code running in parallel + +## How performance considerations influence the memory model + +Many languages that ofter the kinds of memory safety guarentees that +Rust does have a single allocation strategy: objects live on the heap, +live for as long as they are needed, and are periodically garbage +collected. This is very straightforword both conceptually and in +implementation, but has very significant costs. Such languages tend to +aggressively pursue ways to ameliorate allocation costs (think the +Java virtual machine). Rust supports this strategy with _shared +boxes_, memory allocated on the heap that may be referred to (shared) +by multiple variables. + +In comparison, languages like C++ offer a very precise control over +where objects are allocated. In particular, it is common to put +them directly on the stack, avoiding expensive heap allocation. In +Rust this is possible as well, and the compiler will use a clever +lifetime analysis to ensure that no variable can refer to stack +objects after they are destroyed. + +## How concurrency considerations influence the memory model + +Memory safety in a concurrent environment tends to mean avoiding race +conditions between two threads of execution accessing the same +memory. Even high-level languages frequently avoid solving this +problem, requiring programmers to correctly employ locking to unsure +their program is free of races. + +Rust starts from the position that memory simply cannot be shared +between tasks. Experience in other languages has proven that isolating +each tasks' heap from each other is a reliable strategy and one that +is easy for programmers to reason about. Having isolated heaps +additionally means that garbage collection must only be done +per-heap. Rust never 'stops the world' to garbage collect memory. + +If Rust tasks have completely isolated heaps then that seems to imply +that any data transferred between them must be copied. While this +is a fine and useful way to implement communication between tasks, +it is also very inefficient for large data structures. + +Because of this Rust also introduces a global "exchange heap". Objects +allocated here have _ownership semantics_, meaning that there is only +a single variable that refers to them. For this reason they are +refered to as _unique boxes_. All tasks may allocate objects on this +heap, then transfer ownership of those allocations to other tasks, +avoiding expensive copies. + +## What to be aware of + +Rust has three "realms" in which objects can be allocated: the stack, +the local heap, and the exchange heap. These realms have corresponding +pointer types: the borrowed pointer (`&T`), the shared pointer (`@T`), +and the unique pointer (`~T`). These three sigils will appear +repeatedly as we explore the language. Learning the appropriate role +of each is key to using Rust effectively. + +# Boxes and pointers + +In contrast to a lot of modern languages, aggregate types like records +and enums are not represented as pointers to allocated memory. They +are, like in C and C++, represented directly. This means that if you +`let x = {x: 1f, y: 1f};`, you are creating a record on the stack. If +you then copy it into a data structure, the whole record is copied, +not just a pointer. + +For small records like `point`, this is usually more efficient than +allocating memory and going through a pointer. But for big records, or +records with mutable fields, it can be useful to have a single copy on +the heap, and refer to that through a pointer. + +Rust supports several types of pointers. The safe pointer types are +`@T` for shared boxes allocated on the local heap, `~T`, for +uniquely-owned boxes allocated on the exchange heap, and `&T`, for +borrowed pointers, which may point to any memory, and whose lifetimes +are governed by the call stack. + +Rust also has an unsafe pointer, written `*T`, which is a completely +unchecked pointer type only used in unsafe code (and thus, in typical +Rust code, very rarely). + +All pointer types can be dereferenced with the `*` unary operator. + +## Shared boxes + +Shared boxes are pointers to heap-allocated, reference counted memory. +A cycle collector ensures that circular references do not result in +memory leaks. + +> ***Note:*** We will in the future switch to garbage collection, +> rather than reference counting, for shared boxes. + +Creating a shared box is done by simply applying the unary `@` +operator to an expression. The result of the expression will be boxed, +resulting in a box of the right type. Copying a shared box, as happens +during assignment, only copies a pointer, never the contents of the +box. ~~~~ -fn each(v: ~[int], op: fn(int) -> bool) { - let mut n = 0; - while n < v.len() { - if !op(v[n]) { - break; - } - n += 1; - } -} +let x: @int = @10; // New box, refcount of 1 +let y = x; // Copy the pointer, increase refcount +// When x and y go out of scope, refcount goes to 0, box is freed ~~~~ -And using this function to iterate over a vector: +Shared boxes never cross task boundaries. + +## Unique boxes + +In contrast to shared boxes, unique boxes have a single owner and thus +two unique boxes may not refer to the same memory. All unique boxes +across all tasks are allocated on a single _exchange heap_, where +their uniquely owned nature allows them to be passed between tasks. + +Because unique boxes are uniquely owned, copying them involves allocating +a new unique box and duplicating the contents. Copying unique boxes +is expensive so the compiler will complain if you do. ~~~~ -# import each = vec::each; -# import println = io::println; -each(~[2, 4, 8, 5, 16], |n| { - if n % 2 != 0 { - println("found odd number!"); - false - } else { true } -}); +let x = ~10; +let y = x; // error: copying a non-implicitly copyable type ~~~~ -With `for`, functions like `each` can be treated more -like builtin looping structures. When calling `each` -in a `for` loop, instead of returning `false` to break -out of the loop, you just write `break`. To continue -to the next iteration, write `cont`. +If you really want to copy a unique box you must say so explicitly. ~~~~ -# import each = vec::each; -# import println = io::println; -for each(~[2, 4, 8, 5, 16]) |n| { - if n % 2 != 0 { - println("found odd number!"); - break; - } -} +let x = ~10; +let y = copy x; ~~~~ -As an added bonus, you can use the `ret` keyword, which is not -normally allowed in closures, in a block that appears as the body of a -`for` loop — this will cause a return to happen from the outer -function, not just the loop body. +This is where the 'move' (`<-`) operator comes in. It is similar to +`=`, but it de-initializes its source. Thus, the unique box can move +from `x` to `y`, without violating the constraint that it only has a +single owner (if you used assignment instead of the move operator, the +box would, in principle, be copied). ~~~~ -# import each = vec::each; -fn contains(v: ~[int], elt: int) -> bool { - for each(v) |x| { - if (x == elt) { ret true; } - } - false -} +let x = ~10; +let y <- x; ~~~~ -`for` syntax only works with stack closures. +> ***Note:*** this discussion of copying vs moving does not account +> for the "last use" rules that automatically promote copy operations +> to moves. This is an evolving area of the language that will +> continue to change. +Unique boxes, when they do not contain any shared boxes, can be sent +to other tasks. The sending task will give up ownership of the box, +and won't be able to access it afterwards. The receiving task will +become the sole owner of the box. -# Datatypes +## Borrowed pointers -Rust datatypes are, by default, immutable. The core datatypes of Rust -are structural records and 'enums' (tagged unions, algebraic data -types). +Rust borrowed pointers are a general purpose reference/pointer type, +similar to the C++ reference type, but guaranteed to point to valid +memory. In contrast to unique pointers, where the holder of a unique +pointer is the owner of the pointed-to memory, borrowed pointers never +imply ownership. Pointers may be borrowed from any type, in which case +the pointer is guaranteed not to outlive the value it points to. ~~~~ -type point = {x: float, y: float}; -enum shape { - circle(point, float), - rectangle(point, point) -} -let my_shape = circle({x: 0.0, y: 0.0}, 10.0); +# fn work_with_foo_by_pointer(f: &str) { } +let foo = "foo"; +work_with_foo_by_pointer(&foo); ~~~~ -## Records +The following shows an example of what is _not_ possible with borrowed +pointers. If you were able to write this then the pointer to `foo` +would outlive `foo` itself. -Rust record types are written `{field1: T1, field2: T2 [, ...]}`, -where `T1`, `T2`, ... denote types. Record literals are written in -the same way, but with expressions instead of types. They are quite -similar to C structs, and even laid out the same way in memory (so you -can read from a Rust struct in C, and vice-versa). +~~~~ {.ignore} +let foo_ptr; +{ + let foo = "foo"; + foo_ptr = &foo; +} +~~~~ -The dot operator is used to access record fields (`mypoint.x`). +> ***Note:*** borrowed pointers are a new addition to the language. +> They are not used extensively yet but are expected to become the +> pointer type used in many common situations, in particular for +> by-reference argument passing. Rust's current solution for passing +> arguments by reference is [argument modes](#argument-passing). -Fields that you want to mutate must be explicitly marked as such. For -example... +## Mutability + +All pointer types have a mutable variant, written `@mut T` or `~mut +T`. Given such a pointer, you can write to its contents by combining +the dereference operator with a mutating action. ~~~~ -type stack = {content: ~[int], mut head: uint}; +fn increase_contents(pt: @mut int) { + *pt += 1; +} ~~~~ -With such a type, you can do `mystack.head += 1u`. If `mut` were -omitted from the type, such an assignment would result in a type -error. +# Vectors -To 'update' an immutable record, you use functional record update -syntax, by ending a record literal with the keyword `with`: +Vectors represent a section of memory that contains some number +of values. Like other types in Rust, vectors can be stored on +the stack, the local heap, or the exchange heap. + +~~~ +enum crayon { + almond, antique_brass, apricot, + aquamarine, asparagus, atomic_tangerine, + banana_mania, beaver, bittersweet +} + +// A stack vector of crayons +let stack_crayons: &[crayon] = &[almond, antique_brass, apricot]; +// A local heap (shared) vector of crayons +let local_crayons: @[crayon] = @[aquamarine, asparagus, atomic_tangerine]; +// An exchange heap (unique) vector of crayons +let exchange_crayons: ~[crayon] = ~[banana_mania, beaver, bittersweet]; +~~~ + +> ***Note:*** Until recently Rust only had unique vectors, using the +> unadorned `[]` syntax for literals. This syntax is still supported +> but is deprecated. In the future it will probably represent some +> "reasonable default" vector type. +> +> Unique vectors are the currently-recomended vector type for general +> use as they are the most tested and well-supported by existing +> libraries. There will be a gradual shift toward using more +> stack and local vectors in the coming releases. + +Vector literals are enclosed in square brackets and dereferencing is +also done with square brackets (zero-based): ~~~~ -let oldpoint = {x: 10f, y: 20f}; -let newpoint = {x: 0f with oldpoint}; -assert newpoint == {x: 0f, y: 20f}; +# enum crayon { almond, antique_brass, apricot, +# aquamarine, asparagus, atomic_tangerine, +# banana_mania, beaver, bittersweet }; +# fn draw_crying_puppy(c: crayon) { } + +let crayons = ~[banana_mania, beaver, bittersweet]; +if crayons[0] == bittersweet { draw_crying_puppy(crayons[0]); } ~~~~ -This will create a new struct, copying all the fields from `oldpoint` -into it, except for the ones that are explicitly set in the literal. +By default, vectors are immutable—you can not replace their elements. +The type written as `~[mut T]` is a vector with mutable +elements. Mutable vector literals are written `~[mut]` (empty) or `~[mut +1, 2, 3]` (with elements). -Rust record types are *structural*. This means that `{x: float, y: -float}` is not just a way to define a new type, but is the actual name -of the type. Record types can be used without first defining them. If -module A defines `type point = {x: float, y: float}`, and module B, -without knowing anything about A, defines a function that returns an -`{x: float, y: float}`, you can use that return value as a `point` in -module A. (Remember that `type` defines an additional name for a type, -not an actual new type.) +~~~~ +# enum crayon { almond, antique_brass, apricot, +# aquamarine, asparagus, atomic_tangerine, +# banana_mania, beaver, bittersweet }; -## Record patterns +let crayons = ~[mut banana_mania, beaver, bittersweet]; +crayons[0] = atomic_tangerine; +~~~~ -Records can be destructured in `alt` patterns. The basic syntax is -`{fieldname: pattern, ...}`, but the pattern for a field can be -omitted as a shorthand for simply binding the variable with the same -name as the field. +The `+` operator means concatenation when applied to vector types. ~~~~ -# let mypoint = {x: 0f, y: 0f}; -alt mypoint { - {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ } - {x, y} { /* Simply bind the fields */ } -} +# enum crayon { almond, antique_brass, apricot, +# aquamarine, asparagus, atomic_tangerine, +# banana_mania, beaver, bittersweet }; + +let my_crayons = ~[almond, antique_brass, apricot]; +let your_crayons = ~[banana_mania, beaver, bittersweet]; + +let our_crayons = my_crayons + your_crayons; ~~~~ -The field names of a record do not have to appear in a pattern in the -same order they appear in the type. When you are not interested in all -the fields of a record, a record pattern may end with `, _` (as in -`{field1, _}`) to indicate that you're ignoring all other fields. +The `+=` operator also works as expected, provided the assignee +lives in a mutable slot. -## Enums +~~~~ +# enum crayon { almond, antique_brass, apricot, +# aquamarine, asparagus, atomic_tangerine, +# banana_mania, beaver, bittersweet }; -Enums are datatypes that have several different representations. For -example, the type shown earlier: +let mut my_crayons = ~[almond, antique_brass, apricot]; +let your_crayons = ~[banana_mania, beaver, bittersweet]; +my_crayons += your_crayons; ~~~~ -# type point = {x: float, y: float}; -enum shape { - circle(point, float), - rectangle(point, point) -} + +## Strings + +The `str` type in Rust is represented exactly the same way as a unique +vector of immutable bytes (`~[u8]`). This sequence of bytes is +interpreted as an UTF-8 encoded sequence of characters. This has the +advantage that UTF-8 encoded I/O (which should really be the default +for modern systems) is very fast, and that strings have, for most +intents and purposes, a nicely compact representation. It has the +disadvantage that you only get constant-time access by byte, not by +character. + +~~~~ +let huh = "what?"; +let que: u8 = huh[4]; // indexing a string returns a `u8` +assert que == '?' as u8; ~~~~ -A value of this type is either a circle, in which case it contains a -point record and a float, or a rectangle, in which case it contains -two point records. The run-time representation of such a value -includes an identifier of the actual form that it holds, much like the -'tagged union' pattern in C, but with better ergonomics. +A lot of algorithms don't need constant-time indexed access (they +iterate over all characters, which `str::chars` helps with), and +for those that do, many don't need actual characters, and can operate +on bytes. For algorithms that do really need to index by character, +there are core library functions available. -The above declaration will define a type `shape` that can be used to -refer to such shapes, and two functions, `circle` and `rectangle`, -which can be used to construct values of the type (taking arguments of -the specified types). So `circle({x: 0f, y: 0f}, 10f)` is the way to -create a new circle. +> ***Note:*** like vectors, strings will soon be allocatable in +> the local heap and on the stack, in addition to the exchange heap. -Enum variants do not have to have parameters. This, for example, is -equivalent to a C enum: +## Vector and string methods -~~~~ -enum direction { - north, - east, - south, - west +Both vectors and strings support a number of useful +[methods](#implementation). While we haven't covered methods yet, +most vector functionality is provided by methods, so let's have a +brief look at a few common ones. + +~~~ +# import io::println; +# enum crayon { +# almond, antique_brass, apricot, +# aquamarine, asparagus, atomic_tangerine, +# banana_mania, beaver, bittersweet +# } +# fn unwrap_crayon(c: crayon) -> int { 0 } +# fn eat_crayon_wax(i: int) { } +# fn store_crayon_in_nasal_cavity(i: uint, c: crayon) { } +# fn crayon_to_str(c: crayon) -> str { "" } + +let crayons = ~[almond, antique_brass, apricot]; + +// Check the length of the vector +assert crayons.len() == 3; +assert !crayons.is_empty(); + +// Iterate over a vector +for crayons.each |crayon| { + let delicious_crayon_wax = unwrap_crayon(crayon); + eat_crayon_wax(delicious_crayon_wax); } -~~~~ -This will define `north`, `east`, `south`, and `west` as constants, -all of which have type `direction`. +// Map vector elements +let crayon_names = crayons.map(crayon_to_str); +let favorite_crayon_name = crayon_names[0]; -When an enum is C-like, that is, when none of the variants have -parameters, it is possible to explicitly set the discriminator values -to an integer value: +// Remove whitespace from before and after the string +let new_favorite_crayon_name = favorite_crayon_name.trim(); -~~~~ -enum color { - red = 0xff0000, - green = 0x00ff00, - blue = 0x0000ff +if favorite_crayon_name.len() > 5 { + // Create a substring + println(favorite_crayon_name.substr(0, 5)); +} +~~~ + +# Closures + +Named functions, like those we've seen so far, may not refer to local +variables decalared outside the function - they do not "close over +their environment". For example you couldn't write the following: + +~~~~ {.ignore} +let foo = 10; + +fn bar() -> int { + ret foo; // `bar` cannot refer to `foo` } ~~~~ -If an explicit discriminator is not specified for a variant, the value -defaults to the value of the previous variant plus one. If the first -variant does not have a discriminator, it defaults to 0. For example, -the value of `north` is 0, `east` is 1, etc. +Rust also supports _closures_, functions that can access variables in +the enclosing scope. -When an enum is C-like the `as` cast operator can be used to get the -discriminator's value. +~~~~ +# import println = io::println; +fn call_closure_with_ten(b: fn(int)) { b(10); } + +let captured_var = 20; +let closure = |arg| println(#fmt("captured_var=%d, arg=%d", captured_var, arg)); - +call_closure_with_ten(closure); +~~~~ -There is a special case for enums with a single variant. These are -used to define new types in such a way that the new name is not just a -synonym for an existing type, but its own distinct type. If you say: +Closures begin with the argument list between bars and are followed by +a single expression. The types of the arguments are generally omitted, +as is the return type, because the compiler can almost always infer +them. In the rare case where the compiler needs assistance though, the +arguments and return types may be annotated. ~~~~ -enum gizmo_id = int; +# type mygoodness = fn(str) -> str; type what_the = int; +let bloop = |well, oh: mygoodness| -> what_the { fail oh(well) }; ~~~~ -That is a shorthand for this: +There are several forms of closure, each with its own role. The most +common, called a _stack closure_, has type `fn&` and can directly +access local variables in the enclosing scope. ~~~~ -enum gizmo_id { gizmo_id(int) } +let mut max = 0; +[1, 2, 3].map(|x| if x > max { max = x }); ~~~~ -Enum types like this can have their content extracted with the -dereference (`*`) unary operator: +Stack closures are very efficient because their environment is +allocated on the call stack and refers by pointer to captured +locals. To ensure that stack closures never outlive the local +variables to which they refer, they can only be used in argument +position and cannot be stored in structures nor returned from +functions. Despite the limitations stack closures are used +pervasively in Rust code. -~~~~ -# enum gizmo_id = int; -let my_gizmo_id = gizmo_id(10); -let id_int: int = *my_gizmo_id; -~~~~ +## Shared closures -## Enum patterns +When you need to store a closure in a data structure, a stack closure +will not do, since the compiler will refuse to let you store it. For +this purpose, Rust provides a type of closure that has an arbitrary +lifetime, written `fn@` (boxed closure, analogous to the `@` pointer +type described in the next section). -For enum types with multiple variants, destructuring is the only way to -get at their contents. All variant constructors can be used as -patterns, as in this definition of `area`: +A boxed closure does not directly access its environment, but merely +copies out the values that it closes over into a private data +structure. This means that it can not assign to these variables, and +will not 'see' updates to them. + +This code creates a closure that adds a given string to its argument, +returns it from a function, and then calls it: ~~~~ -# type point = {x: float, y: float}; -# enum shape { circle(point, float), rectangle(point, point) } -fn area(sh: shape) -> float { - alt sh { - circle(_, size) { float::consts::pi * size * size } - rectangle({x, y}, {x: x2, y: y2}) { (x2 - x) * (y2 - y) } - } -} -~~~~ +use std; -Another example, matching nullary enum variants: +fn mk_appender(suffix: str) -> fn@(str) -> str { + ret fn@(s: str) -> str { s + suffix }; +} -~~~~ -# type point = {x: float, y: float}; -# enum direction { north, east, south, west } -fn point_from_direction(dir: direction) -> point { - alt dir { - north { {x: 0f, y: 1f} } - east { {x: 1f, y: 0f} } - south { {x: 0f, y: -1f} } - west { {x: -1f, y: 0f} } - } +fn main() { + let shout = mk_appender("!"); + io::println(shout("hey ho, let's go")); } ~~~~ -## Tuples - -Tuples in Rust behave exactly like records, except that their fields -do not have names (and can thus not be accessed with dot notation). -Tuples can have any arity except for 0 or 1 (though you may see nil, -`()`, as the empty tuple if you like). +This example uses the long closure syntax, `fn@(s: str) ...`, +making the fact that we are declaring a box closure explicit. In +practice boxed closures are usually defined with the short closure +syntax introduced earlier, in which case the compiler will infer +the type of closure. Thus our boxed closure example could also +be written: ~~~~ -let mytup: (int, int, float) = (10, 20, 30.0); -alt mytup { - (a, b, c) { log(info, a + b + (c as int)); } +fn mk_appender(suffix: str) -> fn@(str) -> str { + ret |s| s + suffix; } ~~~~ -## Pointers +## Unique closures -In contrast to a lot of modern languages, record and enum types in -Rust are not represented as pointers to allocated memory. They are, -like in C and C++, represented directly. This means that if you `let x -= {x: 1f, y: 1f};`, you are creating a record on the stack. If you -then copy it into a data structure, the whole record is copied, not -just a pointer. - -For small records like `point`, this is usually more efficient than -allocating memory and going through a pointer. But for big records, or -records with mutable fields, it can be useful to have a single copy on -the heap, and refer to that through a pointer. +Unique closures, written `fn~` in analogy to the `~` pointer type (see +next section), hold on to things that can safely be sent between +processes. They copy the values they close over, much like boxed +closures, but they also 'own' them—meaning no other code can access +them. Unique closures are used in concurrent code, particularly +for spawning [tasks](#tasks). -Rust supports several types of pointers. The simplest is the unsafe -pointer, written `*T`, which is a completely unchecked pointer type -only used in unsafe code (and thus, in typical Rust code, very -rarely). The safe pointer types are `@T` for shared, reference-counted -boxes, and `~T`, for uniquely-owned pointers. +## Closure compatibility -All pointer types can be dereferenced with the `*` unary operator. +A nice property of Rust closures is that you can pass any kind of +closure (as long as the arguments and return types match) to functions +that expect a `fn()`. Thus, when writing a higher-order function that +wants to do nothing with its function argument beyond calling it, you +should almost always specify the type of that argument as `fn()`, so +that callers have the flexibility to pass whatever they want. -### Shared boxes +~~~~ +fn call_twice(f: fn()) { f(); f(); } +call_twice(|| { "I am an inferred stack closure"; } ); +call_twice(fn&() { "I am also a stack closure"; } ); +call_twice(fn@() { "I am a boxed closure"; }); +call_twice(fn~() { "I am a unique closure"; }); +fn bare_function() { "I am a plain function"; } +call_twice(bare_function); +~~~~ -Shared boxes are pointers to heap-allocated, reference counted memory. -A cycle collector ensures that circular references do not result in -memory leaks. +## Do syntax -Creating a shared box is done by simply applying the unary `@` -operator to an expression. The result of the expression will be boxed, -resulting in a box of the right type. For example: +Closures in Rust are frequently used in combination with higher-order +functions to simulate control structures like `if` and +`loop`. Consider this function that iterates over a vector of +integers, applying an operator to each: ~~~~ -let x = @10; // New box, refcount of 1 -let y = x; // Copy the pointer, increase refcount -// When x and y go out of scope, refcount goes to 0, box is freed +fn each(v: ~[int], op: fn(int)) { + let mut n = 0; + while n < v.len() { + op(v[n]); + n += 1; + } +} ~~~~ -***Note:*** We may in the future switch to garbage collection, rather -than reference counting, for shared boxes. - -Shared boxes never cross task boundaries. +As a caller, if we use a closure to provide the final operator +argument, we can write it in a way that has a pleasant, block-like +structure. -### Unique boxes +~~~~ +# fn each(v: ~[int], op: fn(int)) {} +# fn do_some_work(i: int) { } +each(~[1, 2, 3], |n| { + #debug("%i", n); + do_some_work(n); +}); +~~~~ -In contrast to shared boxes, unique boxes are not reference counted. -Instead, it is statically guaranteed that only a single owner of the -box exists at any time. +This is such a useful pattern that Rust has a special form of function +call that can be written more like a built-in control structure: ~~~~ -let x = ~10; -let y <- x; +# fn each(v: ~[int], op: fn(int)) {} +# fn do_some_work(i: int) { } +do each(~[1, 2, 3]) |n| { + #debug("%i", n); + do_some_work(n); +} ~~~~ -This is where the 'move' (`<-`) operator comes in. It is similar to -`=`, but it de-initializes its source. Thus, the unique box can move -from `x` to `y`, without violating the constraint that it only has a -single owner (if you used assignment instead of the move operator, the -box would, in principle, be copied). +The call is prefixed with the keyword `do` and, instead of writing the +final closure inside the argument list it is moved outside of the +parenthesis where it looks visually more like a typical block of +code. The `do` expression is purely syntactic sugar for a call that +takes a final closure argument. -Unique boxes, when they do not contain any shared boxes, can be sent -to other tasks. The sending task will give up ownership of the box, -and won't be able to access it afterwards. The receiving task will -become the sole owner of the box. +`do` is often used for task spawning. -### Mutability +~~~~ +import task::spawn; -All pointer types have a mutable variant, written `@mut T` or `~mut -T`. Given such a pointer, you can write to its contents by combining -the dereference operator with a mutating action. +do spawn() || { + #debug("I'm a task, whatever"); +} +~~~~ + +That's nice, but look at all those bars and parentheses - that's two empty +argument lists back to back. Wouldn't it be great if they weren't +there? ~~~~ -fn increase_contents(pt: @mut int) { - *pt += 1; +# import task::spawn; +do spawn { + #debug("Kablam!"); } ~~~~ -## Vectors +Empty argument lists can be omitted from `do` expressions. -Rust vectors are always heap-allocated and unique. A value of type -`~[T]` is represented by a pointer to a section of heap memory -containing any number of values of type `T`. +## For loops -***Note:*** This uniqueness is turning out to be quite awkward in -practice, and might change in the future. +Most iteration in Rust is done with `for` loops. Like `do`, +`for` is a nice syntax for doing control flow with closures. +Additionally, within a `for` loop, `break`, `again`, and `ret` +work just as they do with `while` and `loop`. -Vector literals are enclosed in square brackets. Dereferencing is done -with square brackets (zero-based): +Consider again our `each` function, this time improved to +break early when the iteratee returns `false`: ~~~~ -let myvec = ~[true, false, true, false]; -if myvec[1] { io::println("boom"); } +fn each(v: ~[int], op: fn(int) -> bool) { + let mut n = 0; + while n < v.len() { + if !op(v[n]) { + break; + } + n += 1; + } +} ~~~~ -By default, vectors are immutable—you can not replace their elements. -The type written as `~[mut T]` is a vector with mutable -elements. Mutable vector literals are written `~[mut]` (empty) or `~[mut -1, 2, 3]` (with elements). - -The `+` operator means concatenation when applied to vector types. -Growing a vector in Rust is not as inefficient as it looks : +And using this function to iterate over a vector: ~~~~ -let mut myvec = ~[], i = 0; -while i < 100 { - myvec += ~[i]; - i += 1; -} +# import each = vec::each; +# import println = io::println; +each(~[2, 4, 8, 5, 16], |n| { + if n % 2 != 0 { + println("found odd number!"); + false + } else { true } +}); ~~~~ -Because a vector is unique, replacing it with a longer one (which is -what `+= ~[i]` does) is indistinguishable from appending to it -in-place. Vector representations are optimized to grow -logarithmically, so the above code generates about the same amount of -copying and reallocation as `push` implementations in most other -languages. - -***Note:*** Actually, currently, growing a vector is *exactly* as -inefficient as it looks, since vector `+` has been moved to the -libraries and Rust's operator overloading support is insufficient to -allow this optimization. Try using `vec::push`. - -## Strings - -The `str` type in Rust is represented exactly the same way as a vector -of bytes (`~[u8]`), except that it is guaranteed to have a trailing -null byte (for interoperability with C APIs). +With `for`, functions like `each` can be treated more +like builtin looping structures. When calling `each` +in a `for` loop, instead of returning `false` to break +out of the loop, you just write `break`. To skip ahead +to the next iteration, write `again`. -This sequence of bytes is interpreted as an UTF-8 encoded sequence of -characters. This has the advantage that UTF-8 encoded I/O (which -should really be the default for modern systems) is very fast, and -that strings have, for most intents and purposes, a nicely compact -representation. It has the disadvantage that you only get -constant-time access by byte, not by character. +~~~~ +# import each = vec::each; +# import println = io::println; +for each(~[2, 4, 8, 5, 16]) |n| { + if n % 2 != 0 { + println("found odd number!"); + break; + } +} +~~~~ -A lot of algorithms don't need constant-time indexed access (they -iterate over all characters, which `str::chars` helps with), and -for those that do, many don't need actual characters, and can operate -on bytes. For algorithms that do really need to index by character, -there's the option to convert your string to a character vector (using -`str::chars`). +As an added bonus, you can use the `ret` keyword, which is not +normally allowed in closures, in a block that appears as the body of a +`for` loop — this will cause a return to happen from the outer +function, not just the loop body. -Like vectors, strings are always unique. You can wrap them in a shared -box to share them. Unlike vectors, there is no mutable variant of -strings. They are always immutable. +~~~~ +# import each = vec::each; +fn contains(v: ~[int], elt: int) -> bool { + for each(v) |x| { + if (x == elt) { ret true; } + } + false +} +~~~~ -NOTE: Section on resources removed. ToDo: document classes and destructors +`for` syntax only works with stack closures. # Argument passing @@ -1848,8 +2031,8 @@ fn plus1(&&x: int) -> int { x + 1 } vec::map(~[1, 2, 3], plus1); ~~~~ -***Note:***: This is inconvenient, and we are hoping to get rid of this -restriction in the future. +> ***Note:*** This is inconvenient, and we are hoping to get rid of +> this restriction in the future. # Modules and crates @@ -2124,8 +2307,8 @@ object-oriented languages tend to solve with methods and inheritance. For example, writing a function that can operate on multiple types of collections. -***Note:***: This feature is very new, and will need a few extensions to be -applicable to more advanced use cases. +> ***Note:*** This feature is very new, and will need a few extensions to be +> applicable to more advanced use cases. ## Declaration @@ -2565,8 +2748,8 @@ copying it by making use of [unique boxes](#unique-boxes), which allow the sending task to release ownership of a value, so that the receiving task can keep on using it. -***Note:***: As Rust evolves, we expect the task API to grow and change -somewhat. The tutorial documents the API as it exists today. +> ***Note:*** As Rust evolves, we expect the task API to grow and +> change somewhat. The tutorial documents the API as it exists today. ## Spawning a task diff --git a/src/etc/emacs/rust-mode.el b/src/etc/emacs/rust-mode.el index 86e5f867cbaa3..a9691a836cb67 100644 --- a/src/etc/emacs/rust-mode.el +++ b/src/etc/emacs/rust-mode.el @@ -56,9 +56,9 @@ "trait" "fn" "enum" "iface" "impl")) (puthash word 'def table)) - (dolist (word '("assert" + (dolist (word '("again" "assert" "break" - "check" "claim" "cont" "copy" + "check" "claim" "copy" "do" "drop" "else" "export" "extern" "fail" "for" diff --git a/src/etc/indenter b/src/etc/indenter index f2e41da9b4e44..017cb926981fb 100755 --- a/src/etc/indenter +++ b/src/etc/indenter @@ -4,13 +4,13 @@ use warnings; my $indent = 0; while (<>) { - if (/^rust: ">>/) { + if (/^rust: ~">>/) { $indent += 1; } printf "%03d %s%s", $indent, (" " x $indent), $_; - if (/^rust: "< -" Last Change: 2010 Oct 13 +" Maintainer: Ben Blum +" Last Change: 2012 Jul 06 if version < 600 syntax clear @@ -12,8 +13,8 @@ endif syn keyword rustAssert assert syn match rustAssert "assert\(\w\)*" syn keyword rustKeyword alt as break -syn keyword rustKeyword check claim cont const copy else export extern fail -syn keyword rustKeyword do drop for if impl import in let log +syn keyword rustKeyword check claim cont const copy do drop else export extern fail +syn keyword rustKeyword for if impl import in let log syn keyword rustKeyword loop mod mut new of pure syn keyword rustKeyword ret self to unchecked syn match rustKeyword "unsafe" " Allows also matching unsafe::foo() @@ -30,6 +31,16 @@ syn keyword rustKeyword m32 m64 m128 f80 f16 f128 syn keyword rustType any int uint float char bool u8 u16 u32 u64 f32 syn keyword rustType f64 i8 i16 i32 i64 str +syn keyword rustType option either + +" Types from libc +syn keyword rustType c_float c_double c_void FILE fpos_t +syn keyword rustType DIR dirent +syn keyword rustType c_char c_schar c_uchar +syn keyword rustType c_short c_ushort c_int c_uint c_long c_ulong +syn keyword rustType size_t ptrdiff_t clock_t time_t +syn keyword rustType c_longlong c_ulonglong intptr_t uintptr_t +syn keyword rustType off_t dev_t ino_t pid_t mode_t ssize_t syn keyword rustBoolean true false @@ -37,9 +48,19 @@ syn keyword rustConstant some none " option syn keyword rustConstant left right " either syn keyword rustConstant ok err " result syn keyword rustConstant success failure " task -" syn keyword rustConstant cons nil " list +syn keyword rustConstant cons nil " list " syn keyword rustConstant empty node " tree +" Constants from libc +syn keyword rustConstant EXIT_FAILURE EXIT_SUCCESS RAND_MAX +syn keyword rustConstant EOF SEEK_SET SEEK_CUR SEEK_END _IOFBF _IONBF +syn keyword rustConstant _IOLBF BUFSIZ FOPEN_MAX FILENAME_MAX L_tmpnam +syn keyword rustConstant TMP_MAX O_RDONLY O_WRONLY O_RDWR O_APPEND O_CREAT +syn keyword rustConstant O_EXCL O_TRUNC S_IFIFO S_IFCHR S_IFBLK S_IFDIR +syn keyword rustConstant S_IFREG S_IFMT S_IEXEC S_IWRITE S_IREAD S_IRWXU +syn keyword rustConstant S_IXUSR S_IWUSR S_IRUSR F_OK R_OK W_OK X_OK +syn keyword rustConstant STDIN_FILENO STDOUT_FILENO STDERR_FILENO + " If foo::bar changes to foo.bar, change this ("::" to "\."). " If foo::bar changes to Foo::bar, change this (first "\w" to "\u"). syn match rustModPath "\w\(\w\)*::[^<]"he=e-3,me=e-3 diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index e6a7a9fcfe6f8..4faf7a2f48d0c 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -42,7 +42,7 @@ fn common_exprs() -> ~[ast::expr] { } ~[dse(ast::expr_break), - dse(ast::expr_cont), + dse(ast::expr_again), dse(ast::expr_fail(option::none)), dse(ast::expr_fail(option::some( @dse(ast::expr_lit(@dsl(ast::lit_str(@"boo"))))))), diff --git a/src/libcore/task.rs b/src/libcore/task.rs index 2d2c2660fc880..f41e24c623f70 100644 --- a/src/libcore/task.rs +++ b/src/libcore/task.rs @@ -47,6 +47,7 @@ export unsupervise; export run_listener; export spawn; +export spawn_with; export spawn_listener; export spawn_sched; export try; @@ -338,6 +339,28 @@ fn unsupervise(builder: builder) { }); } +fn run_with(-builder: builder, + +arg: A, + +f: fn~(+A)) { + + /*! + * + * Runs a task, while transfering ownership of one argument to the + * child. + * + * This is useful for transfering ownership of noncopyables to + * another task. + * + */ + + let arg = ~mut some(arg); + do run(builder) { + let mut my_arg = none; + my_arg <-> *arg; + f(option::unwrap(my_arg)) + } +} + fn run_listener(-builder: builder, +f: fn~(comm::port)) -> comm::chan { /*! @@ -381,6 +404,22 @@ fn spawn(+f: fn~()) { run(builder(), f); } +fn spawn_with(+arg: A, +f: fn~(+A)) { + /*! + * Runs a new task while providing a channel from the parent to the child + * + * Sets up a communication channel from the current task to the new + * child task, passes the port to child's body, and returns a channel + * linked to the port to the parent. + * + * This encapsulates some boilerplate handshaking logic that would + * otherwise be required to establish communication from the parent + * to the child. + */ + + run_with(builder(), arg, f) +} + fn spawn_listener(+f: fn~(comm::port)) -> comm::chan { /*! * Runs a new task while providing a channel from the parent to the child diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 4c0be731df4ba..1941d809d7b87 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -336,7 +336,7 @@ enum expr_ { expr_addr_of(mutability, @expr), expr_fail(option<@expr>), expr_break, - expr_cont, + expr_again, expr_ret(option<@expr>), expr_log(int, @expr, @expr), diff --git a/src/libsyntax/fold.rs b/src/libsyntax/fold.rs index 75977ba616931..7f83b16b8ab71 100644 --- a/src/libsyntax/fold.rs +++ b/src/libsyntax/fold.rs @@ -464,7 +464,7 @@ fn noop_fold_expr(e: expr_, fld: ast_fold) -> expr_ { } expr_path(pth) { expr_path(fld.fold_path(pth)) } expr_fail(e) { expr_fail(option::map(e, fld.fold_expr)) } - expr_break | expr_cont { copy e } + expr_break | expr_again { copy e } expr_ret(e) { expr_ret(option::map(e, fld.fold_expr)) } expr_log(i, lv, e) { expr_log(i, fld.fold_expr(lv), fld.fold_expr(e)) } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 2f4fe783b4903..c94e2acbb2b44 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -463,7 +463,8 @@ class parser { } mt { ty_uniq(mt) } } - } else if self.token == token::BINOP(token::STAR) { + } else if self.token == token::BINOP(token::STAR) || + self.token == token::BINOP(token::CARET) { self.bump(); ty_ptr(self.parse_mt()) } else if self.token == token::LBRACE { @@ -966,8 +967,9 @@ class parser { } else if self.eat_keyword("break") { ex = expr_break; hi = self.span.hi; - } else if self.eat_keyword("cont") { - ex = expr_cont; + } else if self.eat_keyword("cont") || + self.eat_keyword("again") { + ex = expr_again; hi = self.span.hi; } else if self.eat_keyword("copy") { let e = self.parse_expr(); diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index dbc1e8b34e34d..b3fad7e9ffd94 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -304,8 +304,7 @@ fn contextual_keyword_table() -> hashmap { fn restricted_keyword_table() -> hashmap { let words = str_hash(); let keys = ~[ - "alt", - "assert", + "alt", "again", "assert", "break", "check", "claim", "class", "const", "cont", "copy", "do", "drop", diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 5220ecdce9363..4e6b47db1b2ed 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -341,7 +341,7 @@ fn print_type_ex(s: ps, &&ty: @ast::ty, print_colons: bool) { print_type(s, mt.ty); word(s.s, "]"); } - ast::ty_ptr(mt) { word(s.s, "*"); print_mt(s, mt); } + ast::ty_ptr(mt) { word(s.s, "^"); print_mt(s, mt); } ast::ty_rptr(region, mt) { alt region.node { ast::re_anon { word(s.s, "&"); } @@ -1063,7 +1063,7 @@ fn print_expr(s: ps, &&expr: @ast::expr) { } } ast::expr_break { word(s.s, "break"); } - ast::expr_cont { word(s.s, "cont"); } + ast::expr_again { word(s.s, "again"); } ast::expr_ret(result) { word(s.s, "ret"); alt result { diff --git a/src/libsyntax/visit.rs b/src/libsyntax/visit.rs index ed54ad3308b0b..0a7e757bbba89 100644 --- a/src/libsyntax/visit.rs +++ b/src/libsyntax/visit.rs @@ -421,7 +421,7 @@ fn visit_expr(ex: @expr, e: E, v: vt) { expr_path(p) { visit_path(p, e, v); } expr_fail(eo) { visit_expr_opt(eo, e, v); } expr_break { } - expr_cont { } + expr_again { } expr_ret(eo) { visit_expr_opt(eo, e, v); } expr_log(_, lv, x) { v.visit_expr(lv, e, v); diff --git a/src/rustc/metadata/decoder.rs b/src/rustc/metadata/decoder.rs index 3380540786010..13829a188d9bc 100644 --- a/src/rustc/metadata/decoder.rs +++ b/src/rustc/metadata/decoder.rs @@ -288,6 +288,7 @@ fn item_to_def_like(item: ebml::doc, did: ast::def_id, cnum: ast::crate_num) 'u' { dl_def(ast::def_fn(did, ast::unsafe_fn)) } 'f' { dl_def(ast::def_fn(did, ast::impure_fn)) } 'p' { dl_def(ast::def_fn(did, ast::pure_fn)) } + 'F' { dl_def(ast::def_fn(did, ast::extern_fn)) } 'y' { dl_def(ast::def_ty(did)) } 't' { dl_def(ast::def_ty(did)) } 'm' { dl_def(ast::def_mod(did)) } diff --git a/src/rustc/metadata/encoder.rs b/src/rustc/metadata/encoder.rs index 46a3cd84be3ba..1e50f1168c7b2 100644 --- a/src/rustc/metadata/encoder.rs +++ b/src/rustc/metadata/encoder.rs @@ -527,7 +527,7 @@ fn purity_fn_family(p: purity) -> char { unsafe_fn { 'u' } pure_fn { 'p' } impure_fn { 'f' } - extern_fn { 'c' } + extern_fn { 'F' } } } diff --git a/src/rustc/middle/borrowck.rs b/src/rustc/middle/borrowck.rs index 38559aec28dd2..ec2002f8f6b18 100644 --- a/src/rustc/middle/borrowck.rs +++ b/src/rustc/middle/borrowck.rs @@ -467,7 +467,9 @@ impl to_str_methods for borrowck_ctxt { cat_special(sk_method) { "method" } cat_special(sk_static_item) { "static item" } cat_special(sk_self) { "self reference" } - cat_special(sk_heap_upvar) { "variable declared in an outer block" } + cat_special(sk_heap_upvar) { + "captured outer variable from within a heap closure" + } cat_rvalue { "non-lvalue" } cat_local(_) { mut_str + " local variable" } cat_binding(_) { "pattern binding" } @@ -475,7 +477,7 @@ impl to_str_methods for borrowck_ctxt { cat_deref(_, _, pk) { #fmt["dereference of %s %s pointer", mut_str, self.pk_to_sigil(pk)] } cat_stack_upvar(_) { - mut_str + " variable declared in an outer block" + "captured " + mut_str + " variable from within a stack closure" } cat_comp(_, comp_field(*)) { mut_str + " field" } cat_comp(_, comp_tuple) { "tuple content" } diff --git a/src/rustc/middle/borrowck/categorization.rs b/src/rustc/middle/borrowck/categorization.rs index deccf0af2b46d..1a2b4d7834eca 100644 --- a/src/rustc/middle/borrowck/categorization.rs +++ b/src/rustc/middle/borrowck/categorization.rs @@ -176,7 +176,7 @@ impl public_methods for borrowck_ctxt { ast::expr_new(*) | ast::expr_binary(*) | ast::expr_while(*) | ast::expr_block(*) | ast::expr_loop(*) | ast::expr_alt(*) | ast::expr_lit(*) | ast::expr_break | ast::expr_mac(*) | - ast::expr_cont | ast::expr_rec(*) { + ast::expr_again | ast::expr_rec(*) { ret self.cat_rvalue(expr, expr_ty); } } diff --git a/src/rustc/middle/check_loop.rs b/src/rustc/middle/check_loop.rs index 44fbdaef7ce40..e8e2c57a8b227 100644 --- a/src/rustc/middle/check_loop.rs +++ b/src/rustc/middle/check_loop.rs @@ -33,7 +33,7 @@ fn check_crate(tcx: ty::ctxt, crate: @crate) { tcx.sess.span_err(e.span, "`break` outside of loop"); } } - expr_cont { + expr_again { if !cx.in_loop { tcx.sess.span_err(e.span, "`cont` outside of loop"); } diff --git a/src/rustc/middle/liveness.rs b/src/rustc/middle/liveness.rs index 2f87f6d55de79..50ab6b4b627bf 100644 --- a/src/rustc/middle/liveness.rs +++ b/src/rustc/middle/liveness.rs @@ -470,7 +470,7 @@ fn visit_expr(expr: @expr, &&self: @ir_maps, vt: vt<@ir_maps>) { expr_assert(*) | expr_check(*) | expr_addr_of(*) | expr_copy(*) | expr_loop_body(*) | expr_do_body(*) | expr_cast(*) | expr_unary(*) | expr_fail(*) | - expr_break | expr_cont | expr_lit(_) | expr_ret(*) | + expr_break | expr_again | expr_lit(_) | expr_ret(*) | expr_block(*) | expr_move(*) | expr_assign(*) | expr_swap(*) | expr_assign_op(*) | expr_mac(*) { visit::visit_expr(expr, self, vt); @@ -1009,7 +1009,7 @@ class liveness { self.break_ln } - expr_cont { + expr_again { if !self.cont_ln.is_valid() { self.tcx.sess.span_bug( expr.span, "cont with invalid cont_ln"); @@ -1457,7 +1457,7 @@ fn check_expr(expr: @expr, &&self: @liveness, vt: vt<@liveness>) { expr_assert(*) | expr_check(*) | expr_copy(*) | expr_loop_body(*) | expr_do_body(*) | expr_cast(*) | expr_unary(*) | expr_fail(*) | - expr_ret(*) | expr_break | expr_cont | expr_lit(_) | + expr_ret(*) | expr_break | expr_again | expr_lit(_) | expr_block(*) | expr_swap(*) | expr_mac(*) | expr_addr_of(*) { visit::visit_expr(expr, self, vt); } diff --git a/src/rustc/middle/trans/base.rs b/src/rustc/middle/trans/base.rs index 83a9847c163b7..5534140ae721f 100644 --- a/src/rustc/middle/trans/base.rs +++ b/src/rustc/middle/trans/base.rs @@ -1755,28 +1755,7 @@ fn trans_assign_op(bcx: block, ex: @ast::expr, op: ast::binop, _ {} } - // Special case for `+= ~[x]` - alt ty::get(t).struct { - ty::ty_vec(_) { - alt src.node { - ast::expr_vec(args, _) { - ret tvec::trans_append_literal(lhs_res.bcx, - lhs_res.val, t, args); - } - _ { } - } - } - _ { } - } let {bcx, val: rhs_val} = trans_temp_expr(lhs_res.bcx, src); - if ty::type_is_sequence(t) { - alt op { - ast::add { - ret tvec::trans_append(bcx, t, lhs_res.val, rhs_val); - } - _ { } - } - } ret trans_eager_binop(bcx, ex.span, op, Load(bcx, lhs_res.val), t, rhs_val, t, save_in(lhs_res.val)); @@ -2413,16 +2392,18 @@ fn lval_static_fn_inner(bcx: block, fn_id: ast::def_id, id: ast::node_id, ccx, node_id_type(bcx, id)))); } - // FIXME: Need to support extern-ABI functions (#1840) - if fn_id.crate == ast::local_crate { - alt bcx.tcx().def_map.find(id) { - some(ast::def_fn(_, ast::extern_fn)) { + alt ty::get(tpt.ty).struct { + ty::ty_fn(fn_ty) { + alt fn_ty.purity { + ast::extern_fn { // Extern functions are just opaque pointers let val = PointerCast(bcx, val, T_ptr(T_i8())); ret lval_no_env(bcx, val, owned_imm); } - _ { } + _ { /* fall through */ } } + } + _ { /* fall through */ } } ret {bcx: bcx, val: val, kind: owned, env: null_env}; @@ -3665,7 +3646,7 @@ fn trans_expr(bcx: block, e: @ast::expr, dest: dest) -> block { assert dest == ignore; ret trans_break(bcx); } - ast::expr_cont { + ast::expr_again { assert dest == ignore; ret trans_cont(bcx); } diff --git a/src/rustc/middle/trans/tvec.rs b/src/rustc/middle/trans/tvec.rs index d3212cd99e060..f7725cdb0771d 100644 --- a/src/rustc/middle/trans/tvec.rs +++ b/src/rustc/middle/trans/tvec.rs @@ -306,75 +306,6 @@ fn trans_estr(bcx: block, s: @str, vstore: ast::vstore, base::store_in_dest(bcx, c, dest) } -fn trans_append(bcx: block, vec_ty: ty::t, lhsptr: ValueRef, - rhs: ValueRef) -> block { - let _icx = bcx.insn_ctxt("tvec::trans_append"); - // Cast to opaque interior vector types if necessary. - let ccx = bcx.ccx(); - let unit_ty = ty::sequence_element_type(ccx.tcx, vec_ty); - let strings = ty::type_is_str(vec_ty); - - let lhs = Load(bcx, lhsptr); - let self_append = ICmp(bcx, lib::llvm::IntEQ, lhs, rhs); - let lfill = get_fill(bcx, get_bodyptr(bcx, lhs)); - let rfill = get_fill(bcx, get_bodyptr(bcx, rhs)); - let mut new_fill = Add(bcx, lfill, rfill); - if strings { new_fill = Sub(bcx, new_fill, C_int(ccx, 1)); } - let opaque_lhs = PointerCast(bcx, lhsptr, - T_ptr(T_ptr(T_i8()))); - Call(bcx, ccx.upcalls.vec_grow, - ~[opaque_lhs, new_fill]); - // Was overwritten if we resized - let lhs = Load(bcx, lhsptr); - let rhs = Select(bcx, self_append, lhs, rhs); - - let lbody = get_bodyptr(bcx, lhs); - - let lhs_data = get_dataptr(bcx, lbody); - let mut lhs_off = lfill; - if strings { lhs_off = Sub(bcx, lhs_off, C_int(ccx, 1)); } - let write_ptr = pointer_add(bcx, lhs_data, lhs_off); - let write_ptr_ptr = do_spill_noroot(bcx, write_ptr); - iter_vec_uniq(bcx, rhs, vec_ty, rfill, |bcx, addr, _ty| { - let write_ptr = Load(bcx, write_ptr_ptr); - let bcx = copy_val(bcx, INIT, write_ptr, - load_if_immediate(bcx, addr, unit_ty), unit_ty); - Store(bcx, InBoundsGEP(bcx, write_ptr, ~[C_int(ccx, 1)]), - write_ptr_ptr); - bcx - }) -} - -fn trans_append_literal(bcx: block, vptrptr: ValueRef, vec_ty: ty::t, - vals: ~[@ast::expr]) -> block { - let _icx = bcx.insn_ctxt("tvec::trans_append_literal"); - let mut bcx = bcx, ccx = bcx.ccx(); - let elt_ty = ty::sequence_element_type(bcx.tcx(), vec_ty); - let elt_llty = type_of::type_of(ccx, elt_ty); - let elt_sz = shape::llsize_of(ccx, elt_llty); - let scratch = base::alloca(bcx, elt_llty); - for vec::each(vals) |val| { - bcx = base::trans_expr_save_in(bcx, val, scratch); - let vptr = get_bodyptr(bcx, Load(bcx, vptrptr)); - let old_fill = get_fill(bcx, vptr); - let new_fill = Add(bcx, old_fill, elt_sz); - let do_grow = ICmp(bcx, lib::llvm::IntUGT, new_fill, - get_alloc(bcx, vptr)); - bcx = do base::with_cond(bcx, do_grow) |bcx| { - let pt = PointerCast(bcx, vptrptr, - T_ptr(T_ptr(T_i8()))); - Call(bcx, ccx.upcalls.vec_grow, ~[pt, new_fill]); - bcx - }; - let vptr = get_bodyptr(bcx, Load(bcx, vptrptr)); - set_fill(bcx, vptr, new_fill); - let targetptr = pointer_add(bcx, get_dataptr(bcx, vptr), - old_fill); - call_memmove(bcx, targetptr, scratch, elt_sz); - } - bcx -} - type val_and_ty_fn = fn@(block, ValueRef, ty::t) -> result; type iter_vec_block = fn(block, ValueRef, ty::t) -> block; diff --git a/src/rustc/middle/trans/type_use.rs b/src/rustc/middle/trans/type_use.rs index c8ff5ef4c1649..386bf06ef1434 100644 --- a/src/rustc/middle/trans/type_use.rs +++ b/src/rustc/middle/trans/type_use.rs @@ -242,7 +242,7 @@ fn mark_for_expr(cx: ctx, e: @expr) { }) } expr_alt(_, _, _) | expr_block(_) | expr_if(_, _, _) | - expr_while(_, _) | expr_fail(_) | expr_break | expr_cont | + expr_while(_, _) | expr_fail(_) | expr_break | expr_again | expr_unary(_, _) | expr_lit(_) | expr_assert(_) | expr_check(_, _) | expr_if_check(_, _, _) | expr_mac(_) | expr_addr_of(_, _) | expr_ret(_) | expr_loop(_) | diff --git a/src/rustc/middle/tstate/pre_post_conditions.rs b/src/rustc/middle/tstate/pre_post_conditions.rs index 545f6ad6aa97c..c2c8810ec1bb1 100644 --- a/src/rustc/middle/tstate/pre_post_conditions.rs +++ b/src/rustc/middle/tstate/pre_post_conditions.rs @@ -446,7 +446,7 @@ fn find_pre_post_expr(fcx: fn_ctxt, e: @expr) { join_then_else(fcx, p, conseq, maybe_alt, e.id, if_check); } expr_break { clear_pp(expr_pp(fcx.ccx, e)); } - expr_cont { clear_pp(expr_pp(fcx.ccx, e)); } + expr_again { clear_pp(expr_pp(fcx.ccx, e)); } expr_mac(_) { fcx.ccx.tcx.sess.bug("unexpanded macro"); } } } diff --git a/src/rustc/middle/tstate/states.rs b/src/rustc/middle/tstate/states.rs index adf1efc85626f..f6cf240eeca34 100644 --- a/src/rustc/middle/tstate/states.rs +++ b/src/rustc/middle/tstate/states.rs @@ -498,7 +498,7 @@ fn find_pre_post_state_expr(fcx: fn_ctxt, pres: prestate, e: @expr) -> bool { ret join_then_else(fcx, p, conseq, maybe_alt, e.id, if_check, pres); } expr_break { ret pure_exp(fcx.ccx, e.id, pres); } - expr_cont { ret pure_exp(fcx.ccx, e.id, pres); } + expr_again { ret pure_exp(fcx.ccx, e.id, pres); } } } diff --git a/src/rustc/middle/typeck/check.rs b/src/rustc/middle/typeck/check.rs index 647498a97e1d3..a983240489ac8 100644 --- a/src/rustc/middle/typeck/check.rs +++ b/src/rustc/middle/typeck/check.rs @@ -1220,7 +1220,7 @@ fn check_expr_with_unifier(fcx: @fn_ctxt, fcx.write_bot(id); } ast::expr_break { fcx.write_bot(id); bot = true; } - ast::expr_cont { fcx.write_bot(id); bot = true; } + ast::expr_again { fcx.write_bot(id); bot = true; } ast::expr_ret(expr_opt) { bot = true; let ret_ty = alt fcx.indirect_ret_ty { diff --git a/src/rustc/middle/typeck/check/method.rs b/src/rustc/middle/typeck/check/method.rs index 709db8733d584..b91f9778e1c84 100644 --- a/src/rustc/middle/typeck/check/method.rs +++ b/src/rustc/middle/typeck/check/method.rs @@ -77,16 +77,20 @@ class lookup { // loop for impls in scope. Note: I don't love these // semantics, but that's what we had so I am preserving // it. - if self.candidates.len() > 0u { - break; - } + if self.candidates.len() > 0u { break; } + + // now look for impls in scope, but don't look for impls that + // would require doing an implicit borrow of the lhs. + self.add_candidates_from_scope(false); - self.add_candidates_from_scope(); + // if we found anything, stop before trying borrows + if self.candidates.len() > 0u { break; } + + // now look for impls in scope that might require a borrow + self.add_candidates_from_scope(true); // if we found anything, stop before attempting auto-deref. - if self.candidates.len() > 0u { - break; - } + if self.candidates.len() > 0u { break; } // check whether we can autoderef and if so loop around again. alt ty::deref(self.tcx(), self.self_ty, false) { @@ -290,7 +294,7 @@ class lookup { */ } - fn add_candidates_from_scope() { + fn add_candidates_from_scope(use_assignability: bool) { let impls_vecs = self.fcx.ccx.impl_map.get(self.expr.id); let mut added_any = false; @@ -306,13 +310,18 @@ class lookup { let {substs: impl_substs, ty: impl_ty} = impl_self_ty(self.fcx, im.did); - // if we can assign the caller to the callee, that's a - // potential match. Collect those in the vector. - let can_assign = self.fcx.can_mk_assignty( - self.self_expr, self.borrow_scope, - self.self_ty, impl_ty); - #debug["can_assign = %?", can_assign]; - alt can_assign { + // Depending on our argument, we find potential + // matches either by checking subtypability or + // type assignability. Collect the matches. + let matches = if use_assignability { + self.fcx.can_mk_assignty( + self.self_expr, self.borrow_scope, + self.self_ty, impl_ty) + } else { + self.fcx.can_mk_subty(self.self_ty, impl_ty) + }; + #debug["matches = %?", matches]; + alt matches { result::err(_) { /* keep looking */ } result::ok(_) { if !self.candidate_impls.contains_key(im.did) { diff --git a/src/rustc/util/common.rs b/src/rustc/util/common.rs index 6a594879d1d41..26b3077210511 100644 --- a/src/rustc/util/common.rs +++ b/src/rustc/util/common.rs @@ -57,7 +57,7 @@ fn loop_query(b: ast::blk, p: fn@(ast::expr_) -> bool) -> bool { fn has_nonlocal_exits(b: ast::blk) -> bool { do loop_query(b) |e| { alt e { - ast::expr_break | ast::expr_cont { true } + ast::expr_break | ast::expr_again { true } _ { false }}} } diff --git a/src/test/auxiliary/extern-crosscrate-source.rs b/src/test/auxiliary/extern-crosscrate-source.rs new file mode 100644 index 0000000000000..55d08a0d0f47d --- /dev/null +++ b/src/test/auxiliary/extern-crosscrate-source.rs @@ -0,0 +1,22 @@ +#[link(name = "externcallback", + vers = "0.1")]; + +#[crate_type = "lib"]; + +extern mod rustrt { + fn rust_dbg_call(cb: *u8, + data: libc::uintptr_t) -> libc::uintptr_t; +} + +fn fact(n: uint) -> uint { + #debug("n = %?", n); + rustrt::rust_dbg_call(cb, n) +} + +extern fn cb(data: libc::uintptr_t) -> libc::uintptr_t { + if data == 1u { + data + } else { + fact(data - 1u) * data + } +} diff --git a/src/test/bench/shootout-k-nucleotide-pipes.rs b/src/test/bench/shootout-k-nucleotide-pipes.rs new file mode 100644 index 0000000000000..7212bbc765b1a --- /dev/null +++ b/src/test/bench/shootout-k-nucleotide-pipes.rs @@ -0,0 +1,250 @@ +// xfail-pretty + +// multi tasking k-nucleotide + +import io::reader_util; + +use std; +import std::map; +import std::map::hashmap; +import std::sort; + +import stream::{stream, chan, port}; + +// After a snapshot, this should move into core, or std. +mod stream { + import option::unwrap; + + proto! streamp { + open:send { + data(T) -> open + } + } + + type chan = { mut endp: option> }; + type port = { mut endp: option> }; + + fn stream() -> (chan, port) { + let (c, s) = streamp::init(); + ({ mut endp: some(c) }, { mut endp: some(s) }) + } + + impl chan for chan { + fn send(+x: T) { + let mut endp = none; + endp <-> self.endp; + self.endp = some( + streamp::client::data(unwrap(endp), x)) + } + } + + impl port for port { + fn recv() -> T { + let mut endp = none; + endp <-> self.endp; + let streamp::data(x, endp) = unwrap( + pipes::recv(unwrap(endp))); + self.endp = some(endp); + x + } + } +} + +// given a map, print a sorted version of it +fn sort_and_fmt(mm: hashmap<~[u8], uint>, total: uint) -> str { + fn pct(xx: uint, yy: uint) -> float { + ret (xx as float) * 100f / (yy as float); + } + + fn le_by_val(kv0: (TT,UU), kv1: (TT,UU)) -> bool { + let (_, v0) = kv0; + let (_, v1) = kv1; + ret v0 >= v1; + } + + fn le_by_key(kv0: (TT,UU), kv1: (TT,UU)) -> bool { + let (k0, _) = kv0; + let (k1, _) = kv1; + ret k0 <= k1; + } + + // sort by key, then by value + fn sortKV(orig: ~[(TT,UU)]) -> ~[(TT,UU)] { + ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig)); + } + + let mut pairs = ~[]; + + // map -> [(k,%)] + mm.each(fn&(key: ~[u8], val: uint) -> bool { + vec::push(pairs, (key, pct(val, total))); + ret true; + }); + + let pairs_sorted = sortKV(pairs); + + let mut buffer = ""; + + pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe { + let (k,v) = kv; + buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]); + ret true; + }); + + ret buffer; +} + +// given a map, search for the frequency of a pattern +fn find(mm: hashmap<~[u8], uint>, key: str) -> uint { + alt mm.find(str::bytes(str::to_lower(key))) { + option::none { ret 0u; } + option::some(num) { ret num; } + } +} + +// given a map, increment the counter for a key +fn update_freq(mm: hashmap<~[u8], uint>, key: &[u8]) { + let key = vec::slice(key, 0, key.len()); + alt mm.find(key) { + option::none { mm.insert(key, 1u ); } + option::some(val) { mm.insert(key, 1u + val); } + } +} + +// given a ~[u8], for each window call a function +// i.e., for "hello" and windows of size four, +// run it("hell") and it("ello"), then return "llo" +fn windows_with_carry(bb: ~[const u8], nn: uint, + it: fn(window: &[u8])) -> ~[u8] { + let mut ii = 0u; + + let len = vec::len(bb); + while ii < len - (nn - 1u) { + it(vec::view(bb, ii, ii+nn)); + ii += 1u; + } + + ret vec::slice(bb, len - (nn - 1u), len); +} + +fn make_sequence_processor(sz: uint, from_parent: stream::port<~[u8]>, + to_parent: stream::chan) { + + let freqs: hashmap<~[u8], uint> = map::bytes_hash(); + let mut carry: ~[u8] = ~[]; + let mut total: uint = 0u; + + let mut line: ~[u8]; + + loop { + + line = from_parent.recv(); + if line == ~[] { break; } + + carry = windows_with_carry(carry + line, sz, |window| { + update_freq(freqs, window); + total += 1u; + }); + } + + let buffer = alt sz { + 1u { sort_and_fmt(freqs, total) } + 2u { sort_and_fmt(freqs, total) } + 3u { #fmt["%u\t%s", find(freqs, "GGT"), "GGT"] } + 4u { #fmt["%u\t%s", find(freqs, "GGTA"), "GGTA"] } + 6u { #fmt["%u\t%s", find(freqs, "GGTATT"), "GGTATT"] } + 12u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATT"), "GGTATTTTAATT"] } + 18u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATTTATAGT"), "GGTATTTTAATTTATAGT"] } + _ { "" } + }; + + //comm::send(to_parent, #fmt["yay{%u}", sz]); + to_parent.send(buffer); +} + +// given a FASTA file on stdin, process sequence THREE +fn main(args: ~[str]) { + let rdr = if os::getenv("RUST_BENCH").is_some() { + // FIXME: Using this compile-time env variable is a crummy way to + // get to this massive data set, but #include_bin chokes on it (#2598) + let path = path::connect( + #env("CFG_SRC_DIR"), + "src/test/bench/shootout-k-nucleotide.data" + ); + result::get(io::file_reader(path)) + } else { + io::stdin() + }; + + + + // initialize each sequence sorter + let sizes = ~[1u,2u,3u,4u,6u,12u,18u]; + let streams = vec::map(sizes, |_sz| some(stream())); + let streams = vec::to_mut(streams); + let mut from_child = ~[]; + let to_child = vec::mapi(sizes, |ii, sz| { + let mut stream = none; + stream <-> streams[ii]; + let (to_parent_, from_child_) = option::unwrap(stream); + + vec::push(from_child, from_child_); + + let (to_child, from_parent) = stream::stream(); + + do task::spawn_with(from_parent) |from_parent| { + make_sequence_processor(sz, from_parent, to_parent_); + }; + + to_child + }); + + + // latch stores true after we've started + // reading the sequence of interest + let mut proc_mode = false; + + while !rdr.eof() { + let line: str = rdr.read_line(); + + if str::len(line) == 0u { cont; } + + alt (line[0], proc_mode) { + + // start processing if this is the one + ('>' as u8, false) { + alt str::find_str_from(line, "THREE", 1u) { + option::some(_) { proc_mode = true; } + option::none { } + } + } + + // break our processing + ('>' as u8, true) { break; } + + // process the sequence for k-mers + (_, true) { + let line_bytes = str::bytes(line); + + for sizes.eachi |ii, _sz| { + let mut lb = line_bytes; + to_child[ii].send(lb); + } + } + + // whatever + _ { } + } + } + + // finish... + for sizes.eachi |ii, _sz| { + to_child[ii].send(~[]); + } + + // now fetch and print result messages + for sizes.eachi |ii, _sz| { + io::println(from_child[ii].recv()); + } +} + diff --git a/src/test/run-pass/assignability-iface.rs b/src/test/run-pass/assignability-iface.rs new file mode 100644 index 0000000000000..47cf7535a6e08 --- /dev/null +++ b/src/test/run-pass/assignability-iface.rs @@ -0,0 +1,43 @@ +// Tests that type assignability is used to search for instances when +// making method calls, but only if there aren't any matches without +// it. + +iface iterable { + fn iterate(blk: fn(A) -> bool); +} + +impl vec/& of iterable for &[const A] { + fn iterate(f: fn(A) -> bool) { + vec::each(self, f); + } +} + +impl vec of iterable for ~[const A] { + fn iterate(f: fn(A) -> bool) { + vec::each(self, f); + } +} + +fn length>(x: T) -> uint { + let mut len = 0; + for x.iterate() |_y| { len += 1 } + ret len; +} + +fn main() { + let x = ~[0,1,2,3]; + // Call a method + for x.iterate() |y| { assert x[y] == y; } + // Call a parameterized function + assert length(x) == vec::len(x); + // Call a parameterized function, with type arguments that require + // a borrow + assert length::(x) == vec::len(x); + + // Now try it with a type that *needs* to be borrowed + let z = [0,1,2,3]/_; + // Call a method + for z.iterate() |y| { assert z[y] == y; } + // Call a parameterized function + assert length::(z) == vec::len(z); +} diff --git a/src/test/run-pass/extern-crosscrate.rs b/src/test/run-pass/extern-crosscrate.rs new file mode 100644 index 0000000000000..f3338475912ec --- /dev/null +++ b/src/test/run-pass/extern-crosscrate.rs @@ -0,0 +1,14 @@ +//aux-build:extern-crosscrate-source.rs + +use externcallback(vers = "0.1"); + +fn fact(n: uint) -> uint { + #debug("n = %?", n); + externcallback::rustrt::rust_dbg_call(externcallback::cb, n) +} + +fn main() { + let result = fact(10u); + #debug("result = %?", result); + assert result == 3628800u; +} diff --git a/src/test/run-pass/pipe-bank-proto.rs b/src/test/run-pass/pipe-bank-proto.rs new file mode 100644 index 0000000000000..e6a4a011b30d2 --- /dev/null +++ b/src/test/run-pass/pipe-bank-proto.rs @@ -0,0 +1,70 @@ +// xfail-pretty + +// An example of the bank protocol from eholk's blog post. +// +// http://theincredibleholk.wordpress.com/2012/07/06/rusty-pipes/ + +import pipes::recv; + +type username = str; +type password = str; +type money = float; +type amount = float; + +proto! bank { + login:send { + login(username, password) -> login_response + } + + login_response:recv { + ok -> connected, + invalid -> login + } + + connected:send { + deposit(money) -> connected, + withdrawal(amount) -> withdrawal_response + } + + withdrawal_response:recv { + money(money) -> connected, + insufficient_funds -> connected + } +} + +fn macros() { + #macro[ + [#move[x], + unsafe { let y <- *ptr::addr_of(x); y }] + ]; +} + +fn bank_client(+bank: bank::client::login) { + import bank::*; + + let bank = client::login(bank, "theincredibleholk", "1234"); + let bank = alt recv(bank) { + some(ok(connected)) { + #move(connected) + } + some(invalid(_)) { fail "login unsuccessful" } + none { fail "bank closed the connection" } + }; + + let bank = client::deposit(bank, 100.00); + let bank = client::withdrawal(bank, 50.00); + alt recv(bank) { + some(money(m, _)) { + io::println("Yay! I got money!"); + } + some(insufficient_funds(_)) { + fail "someone stole my money" + } + none { + fail "bank closed the connection" + } + } +} + +fn main() { +} \ No newline at end of file