diff -Npur linux-2.6-block/Documentation/tp_smapi.txt linux-2.6-block-custom/Documentation/tp_smapi.txt
--- linux-2.6-block/Documentation/tp_smapi.txt	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/Documentation/tp_smapi.txt	2008-09-26 19:45:38.659791957 +0900
@@ -0,0 +1,265 @@
+tp_smapi version 0.37
+IBM ThinkPad hardware functions driver
+
+Author:  Shem Multinymous <multinymous@gmail.com>
+Project: http://sourceforge.net/projects/tpctl
+Wiki:    http://thinkwiki.org/wiki/tp_smapi
+List:    linux-thinkpad@linux-thinkpad.org
+         (http://mailman.linux-thinkpad.org/mailman/listinfo/linux-thinkpad)
+
+Description
+-----------
+
+ThinkPad laptops include a proprietary interface called SMAPI BIOS 
+(System Management Application Program Interface) which provides some
+hardware control functionality that is not accessible by other means.
+
+This driver exposes some features of the SMAPI BIOS through a sysfs 
+interface. It is suitable for newer models, on which SMAPI is invoked 
+through IO port writes. Older models use a different SMAPI interface; 
+for those, try the "thinkpad" module from the "tpctl" package.
+
+WARNING: 
+This driver uses undocumented features and direct hardware access.
+It thus cannot be guaranteed to work, and may cause arbitrary damage
+(especially on models it wasn't tested on).
+
+
+Module parameters
+-----------------
+
+tp_smapi module: 
+  debug=1 enables verbose dmesg output.
+
+
+Usage
+-----
+
+Control of battery charging thresholds (in percents of current full charge
+capacity):
+
+# echo 40 > /sys/devices/platform/smapi/BAT0/start_charge_thresh
+# echo 70 > /sys/devices/platform/smapi/BAT0/stop_charge_thresh
+# cat /sys/devices/platform/smapi/BAT0/*_charge_thresh
+
+    (This is useful since Li-Ion batteries wear out much faster at very 
+     high or low charge levels. The driver will also keeps the thresholds 
+     across suspend-to-disk with AC disconnected; this isn't done 
+     automatically by the hardware.)
+
+Inhibiting battery charging for 17 minutes (overrides thresholds):
+
+# echo 17 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+# echo 0  > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes  # stop
+# cat /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+
+    (This can be used to control which battery is charged when using an 
+     Ultrabay battery.)
+
+Forcing battery discharging even if AC power available:
+
+# echo 1 > /sys/devices/platform/smapi/BAT0/force_discharge  # start discharge
+# echo 0 > /sys/devices/platform/smapi/BAT0/force_discharge  # stop discharge
+# cat /sys/devices/platform/smapi/BAT0/force_discharge
+
+    (This can be used to control which battery is discharged when using an
+     Ultrabay battery.)
+
+Misc read-only battery status attributes (see note about HDAPS below):
+
+/sys/devices/platform/smapi/BAT0/installed   # 0 or 1
+/sys/devices/platform/smapi/BAT0/state       # idle/charging/discharging
+/sys/devices/platform/smapi/BAT0/cycle_count # integer counter
+/sys/devices/platform/smapi/BAT0/current_now # instantaneous current
+/sys/devices/platform/smapi/BAT0/current_avg # last minute average
+/sys/devices/platform/smapi/BAT0/power_now   # instantaneous power
+/sys/devices/platform/smapi/BAT0/power_avg   # last minute average
+/sys/devices/platform/smapi/BAT0/last_full_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/remaining_percent          # remaining percent of energy
+/sys/devices/platform/smapi/BAT0/remaining_running_time     # in minutes, by last minute average power
+/sys/devices/platform/smapi/BAT0/remaining_running_time_now # in minutes, by instantenous power
+/sys/devices/platform/smapi/BAT0/remaining_charging_time    # in minutes
+/sys/devices/platform/smapi/BAT0/remaining_capacity         # in mWh
+/sys/devices/platform/smapi/BAT0/design_capacity            # in mWh
+/sys/devices/platform/smapi/BAT0/voltage           # in mV
+/sys/devices/platform/smapi/BAT0/design_voltage    # in mV
+/sys/devices/platform/smapi/BAT0/charging_max_current  # max charging current
+/sys/devices/platform/smapi/BAT0/charging_max_voltage  # max charging voltage
+/sys/devices/platform/smapi/BAT0/group{0,1,2,3}_voltage # see below
+/sys/devices/platform/smapi/BAT0/manufacturer      # string
+/sys/devices/platform/smapi/BAT0/model             # string
+/sys/devices/platform/smapi/BAT0/barcoding         # string
+/sys/devices/platform/smapi/BAT0/chemistry         # string
+/sys/devices/platform/smapi/BAT0/serial            # integer
+/sys/devices/platform/smapi/BAT0/manufacture_date  # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/first_use_date    # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/temperature  # in milli-Celsius
+/sys/devices/platform/smapi/BAT0/dump         # see below
+/sys/devices/platform/smapi/ac_connected      # 0 or 1
+
+The BAT0/group{0,1,2,3}_voltage attribute refers to the separate cell groups
+in each battery. For example, on the ThinkPad 600, X3x, T4x and R5x models,
+the battery contains 3 cell groups in series, where each group consisting of 2
+or 3 cells  connected in parallel. The voltage of each group is given by these
+attributes, and their sum (roughly) equals the "voltage" attribute. 
+(The effective performance of the battery is determined by the weakest group,
+i.e., the one those voltage changes most rapidly during dis/charging.)
+
+The "BAT0/dump" attribute gives a a hex dump of the raw status data, which
+contains additional data now in the above (if you can figure it out). Some
+unused values are autodetected and replaced by "--":
+
+In all of the above, replace BAT0 with BAT1 to address the 2nd battery (e.g.
+in the UltraBay).
+
+
+Raw SMAPI calls:
+
+/sys/devices/platform/smapi/smapi_request
+This performs raw SMAPI calls. It uses a bad interface that cannot handle
+multiple simultaneous access. Don't touch it, it's for development only.
+If you did touch it, you would so something like
+# echo '211a 100 0 0' > /sys/devices/platform/smapi/smapi_request
+# cat /sys/devices/platform/smapi/smapi_request
+and notice that in the output "211a 34b b2 0 0 0 'OK'", the "4b" in the 2nd
+value, converted to decimal is 75: the current charge stop threshold.
+
+
+Model-specific status
+---------------------
+
+Works (at least partially) on the following ThinkPad model:
+* A30
+* G41
+* R40, R50p, R51, R52
+* T23, T40, T40p, T41, T41p, T42, T42p, T43, T43p, T60
+* X24, X31, X32, X40, X41, X60
+* Z60t, Z61m
+
+Not all functions are available on all models; for detailed status, see:
+  http://thinkwiki.org/wiki/tp_smapi
+
+Please report success/failure by e-mail or on the Wiki. 
+If you get a "not implemented" or "not supported" message, your laptop 
+probably just can't do that (at least not via the SMAPI BIOS).
+For negative reports, follow the bug reporting guidelines below.
+If you send me the necessary technical data (i.e., SMAPI function 
+interfaces), I will support additional models.
+
+
+Additional HDAPS features
+-------------------------
+
+The modified hdaps driver has several improvements on the one in mainline
+(beyond resolving the conflict with thinkpad_ec and tp_smapi):
+
+- Fixes reliability and improves support for recent ThinkPad models
+  (especially *60 and newer). Unlike the mainline driver, the modified hdaps
+  correctly follows the Embedded Controller communication protocol.
+
+- Extends the "invert" parameter to cover all possible axis orientations.
+  The possible values are as follows.
+  Let X,Y denote the hardware readouts.
+  Let R denote the laptop's roll (tilt left/right).
+  Let P denote the laptop's pitch (tilt forward/backward).
+    invert=0:   R= X  P= Y   (same as mainline)
+    invert=1:   R=-X  P=-Y   (same as mainline)
+    invert=2:   R=-X  P= Y   (new)
+    invert=3:   R= X  P=-Y   (new)
+    invert=4:   R= Y  P= X   (new)
+    invert=5:   R=-Y  P=-X   (new)
+    invert=6:   R=-Y  P= X   (new)
+    invert=7:   R= Y  P=-X   (new)
+  It's probably easiest to just try all 8 possibilities and see which yields
+  correct results (e.g., in the hdaps-gl visualisation).
+
+- Adds a whitelist which automatically sets the correct axis orientation for
+  some models. If the value for your model is wrong or missing, you can override
+  it using the "invert" parameter. Please also update the tables at
+  http://www.thinkwiki.org/wiki/tp_smapi and
+  http://www.thinkwiki.org/wiki/List_of_DMI_IDs
+  and submit a patch for the whitelist in hdaps.c.
+
+- Provides new attributes:
+  /sys/devices/platform/hdaps/sampling_rate:
+    This determines the frequency at which the host queries the embedded
+    controller for accelerometer data (and informs the hdaps input devices).
+    Default=50.
+  /sys/devices/platform/hdaps/oversampling_ratio:
+    When set to X, the embedded controller is told to do physical accelerometer
+    measurements at a rate that is X times higher than the rate at which
+    the driver reads those measurements (i.e., X*sampling_rate). This
+    makes the readouts from the embedded controller more fresh, and is also
+    useful for the running average filter (see next). Default=5
+  /sys/devices/platform/hdaps/running_avg_filter_order:
+    When set to X, reported readouts will be the average of the last X physical
+    accelerometer measurements. Current firmware allows 1<=X<=8. Setting to a
+    high value decreases readout fluctuations. The averaging is handled by the
+    embedded controller, so no CPU resources are used. Higher values make the
+    readouts smoother, since it averages out both sensor noise (good) and abrupt
+    changes (bad). Default=2.
+  /sys/devices/platform/hdaps/fake_data_mode:
+    If set to 1, enables a test mode where the physical accelerometer readouts
+    are replaced with an incrementing counter. This is useful for checking the
+    regularity of the sampling interval and driver<->userspace communication.
+
+- Provides a second input device, which publishes the raw accelerometer
+  measurements (without the fuzzing needed for joystick emulation). This input
+  device can be matched by a udev rule such as the following (all on one line):
+    KERNEL=="event[0-9]*", ATTRS{phys}=="hdaps/input1",
+    ATTRS{modalias}=="input:b0019v1014p5054e4801-*",
+    SYMLINK+="input/hdaps/accelerometer-event
+
+A new version of the hdapsd userspace daemon, which uses the input device
+interface instead of polling sysfs, is available seprately. Using this reduces
+the total interrupts per second generated by hdaps+hdapsd (on tickless kernels)
+to 50, down from a value that fluctuates between 50 and 100. Set the
+sampling_rate sysfs attribute to a lower value to further reduce interrupts,
+at the expense of response latency.
+
+Licensing note: all my changes to the HDAPS driver are licensed under the
+GPL version 2 or, at your option and to the extent allowed by derivation from
+prior works, any later version. My version of hdaps is derived work from the
+mainline version, which at the time of writing is available only under
+GPL version 2.
+
+Bug reporting
+-------------
+
+Mail <multinymous@gmail.com>. Please include:
+* Details about your model,
+* Relevant "dmesg" output. Make sure thinkpad_ec and tp_smapi are loaded with
+  the "debug=1" parameter (e.g., use "make load HDAPS=1 DEBUG=1").
+* Output of "dmidecode | grep -C5 Product"
+* Does the failed functionality works under Windows?
+
+
+More about SMAPI
+----------------
+
+For hints about what may be possible via the SMAPI BIOS and how, see:
+
+* IBM Technical Reference Manual for the ThinkPad 770
+  (http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD)
+* Exported symbols in PWRMGRIF.DLL or TPPWRW32.DLL (e.g., use "objdump -x").
+* drivers/char/mwave/smapi.c in the Linux kernel tree.*
+* The "thinkpad" SMAPI module (http://tpctl.sourceforge.net).
+* The SMAPI_* constants in tp_smapi.c (some of these are presently unused).
+
+Note that in the above Technical Reference and in the "thinkpad" module,
+SMAPI is invoked through a function call to some physical address. However,
+the interface used by tp_smapi and the above mwave drive, and apparently 
+required by newer ThinkPad, is different: you set the parameters up in the 
+CPU's registers and write to ports 0xB2 (the APM control port) and 0x4F; this
+triggers an SMI (System Management Interrupt), causing the CPU to enter
+SMM (System Management Mode) and run the BIOS firmware; the results are 
+returned in the CPU's registers. It is not clear what is the relation between 
+the two variants of SMAPI, though the assignment of error codes seems to be 
+similar.
+
+In addition, the embedded controller on ThinkPad laptops has a non-standard 
+interface at IO ports 0x1600-0x161F (mapped to LCP channel 3 of the H8S chip).
+The interface provides various system management services (currently known: 
+battery information and accelerometer readouts). For more information see the
+thinkpad_ec modul and the H8S hardware documentation:
+http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
diff -Npur linux-2.6-block/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c linux-2.6-block-custom/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
--- linux-2.6-block/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c	2008-09-27 16:12:11.702893594 +0900
+++ linux-2.6-block-custom/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c	2008-09-26 21:22:13.947958120 +0900
@@ -25,6 +25,10 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+/* This file has been patched with Linux PHC: http://phc.athousandnights.de
+ * Patch version: linux-phc-0.3.1-kernel-vanilla-2.6.26.patch
+ */
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -58,6 +62,10 @@ enum {
 
 #define INTEL_MSR_RANGE		(0xffff)
 #define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
+#define INTEL_MSR_VID_MASK	(0x00ff)
+#define INTEL_MSR_FID_MASK	(0xff00)
+#define INTEL_MSR_FID_SHIFT	(0x8)
+#define PHC_VERSION_STRING	"0.3.2:1"
 
 struct acpi_cpufreq_data {
 	struct acpi_processor_performance *acpi_data;
@@ -65,6 +73,7 @@ struct acpi_cpufreq_data {
 	unsigned int max_freq;
 	unsigned int resume;
 	unsigned int cpu_feature;
+	acpi_integer *original_controls;
 };
 
 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
@@ -104,13 +113,14 @@ static unsigned extract_io(u32 value, st
 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 {
 	int i;
+	u32 fid;
 	struct acpi_processor_performance *perf;
 
-	msr &= INTEL_MSR_RANGE;
+	fid = msr & INTEL_MSR_FID_MASK;
 	perf = data->acpi_data;
 
 	for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		if (msr == perf->states[data->freq_table[i].index].status)
+		if (fid == (perf->states[data->freq_table[i].index].status & INTEL_MSR_FID_MASK))
 			return data->freq_table[i].frequency;
 	}
 	return data->freq_table[0].frequency;
@@ -742,6 +752,8 @@ static int acpi_cpufreq_cpu_exit(struct 
 		per_cpu(drv_data, policy->cpu) = NULL;
 		acpi_processor_unregister_performance(data->acpi_data,
 						      policy->cpu);
+		if (data->original_controls)
+			kfree(data->original_controls);
 		kfree(data);
 	}
 
@@ -759,8 +771,473 @@ static int acpi_cpufreq_resume(struct cp
 	return 0;
 }
 
+
+
+
+/* sysfs interface to change operating points voltages */
+
+static unsigned int extract_fid_from_control(unsigned int control)
+{
+	return ((control & INTEL_MSR_FID_MASK) >> INTEL_MSR_FID_SHIFT);
+}
+
+static unsigned int extract_vid_from_control(unsigned int control)
+{
+	return (control & INTEL_MSR_VID_MASK);
+}
+
+
+static bool check_cpu_control_capability(struct acpi_cpufreq_data *data) {
+ /* check if the cpu we are running on is capable of setting new control data
+  * 
+  */
+	if (unlikely(data == NULL || 
+	             data->acpi_data == NULL || 
+	             data->freq_table == NULL ||
+	             data->cpu_feature != SYSTEM_INTEL_MSR_CAPABLE)) {
+		return false;
+	} else {
+		return true;
+	};
+}
+
+
+static ssize_t check_origial_table (struct acpi_cpufreq_data *data)
+{
+
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int state_index;
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	if (data->original_controls == NULL) {
+		// Backup original control values
+		data->original_controls = kcalloc(acpi_data->state_count,
+		                                  sizeof(acpi_integer), GFP_KERNEL);
+		if (data->original_controls == NULL) {
+			printk("failed to allocate memory for original control values\n");
+			return -ENOMEM;
+		}
+		for (state_index = 0; state_index < acpi_data->state_count; state_index++) {
+			data->original_controls[state_index] = acpi_data->states[state_index].control;
+		}
+	}
+	return 0;
+}
+
+static ssize_t show_freq_attr_vids(struct cpufreq_policy *policy, char *buf)
+ /* display phc's voltage id's
+  * 
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int i;
+	unsigned int vid;
+	ssize_t count = 0;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		vid = extract_vid_from_control(acpi_data->states[freq_table[i].index].control);
+		count += sprintf(&buf[count], "%u ", vid);
+	}
+	count += sprintf(&buf[count], "\n");
+
+	return count;
+}
+
+static ssize_t show_freq_attr_default_vids(struct cpufreq_policy *policy, char *buf)
+ /* display acpi's default voltage id's
+  * 
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int i;
+	unsigned int vid;
+	ssize_t count = 0;
+	ssize_t retval;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	retval = check_origial_table(data);
+        if (0 != retval)
+		return retval; 
+
+	freq_table = data->freq_table;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		vid = extract_vid_from_control(data->original_controls[freq_table[i].index]);
+		count += sprintf(&buf[count], "%u ", vid);
+	}
+	count += sprintf(&buf[count], "\n");
+
+	return count;
+}
+
+static ssize_t show_freq_attr_fids(struct cpufreq_policy *policy, char *buf)
+ /* display phc's frequeny id's
+  * 
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int i;
+	unsigned int fid;
+	ssize_t count = 0;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		fid = extract_fid_from_control(acpi_data->states[freq_table[i].index].control);
+		count += sprintf(&buf[count], "%u ", fid);
+	}
+	count += sprintf(&buf[count], "\n");
+
+	return count;
+}
+
+static ssize_t show_freq_attr_controls(struct cpufreq_policy *policy, char *buf)
+ /* display phc's controls for the cpu (frequency id's and related voltage id's)
+  * 
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int i;
+	unsigned int fid;
+	unsigned int vid;
+	ssize_t count = 0;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		fid = extract_fid_from_control(acpi_data->states[freq_table[i].index].control);
+		vid = extract_vid_from_control(acpi_data->states[freq_table[i].index].control);
+		count += sprintf(&buf[count], "%u:%u ", fid, vid);
+	}
+	count += sprintf(&buf[count], "\n");
+
+	return count;
+}
+
+static ssize_t show_freq_attr_default_controls(struct cpufreq_policy *policy, char *buf)
+ /* display acpi's default controls for the cpu (frequency id's and related voltage id's)
+  * 
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int i;
+	unsigned int fid;
+	unsigned int vid;
+	ssize_t count = 0;
+	ssize_t retval;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	retval = check_origial_table(data);
+        if (0 != retval)
+		return retval; 
+
+	freq_table = data->freq_table;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		fid = extract_fid_from_control(data->original_controls[freq_table[i].index]);
+		vid = extract_vid_from_control(data->original_controls[freq_table[i].index]);
+		count += sprintf(&buf[count], "%u:%u ", fid, vid);
+	}
+	count += sprintf(&buf[count], "\n");
+
+	return count;
+}
+
+
+static ssize_t store_freq_attr_vids(struct cpufreq_policy *policy, const char *buf, size_t count)
+ /* store the voltage id's for the related frequency
+  * We are going to do some sanity checks here to prevent users 
+  * from setting higher voltages than the default one.
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int freq_index;
+	unsigned int state_index;
+	unsigned int new_vid;
+	unsigned int original_vid;
+	unsigned int new_control;
+	unsigned int original_control;
+	const char *curr_buf = buf;
+	char *next_buf;
+	ssize_t retval;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV; //check if CPU is capable of changing controls
+
+	retval = check_origial_table(data);
+        if (0 != retval)
+		return retval; 
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	/* for each value taken from the sysfs interfalce (phc_vids) get entrys and convert them to unsigned long integers*/
+	for (freq_index = 0; freq_table[freq_index].frequency != CPUFREQ_TABLE_END; freq_index++) {
+		new_vid = simple_strtoul(curr_buf, &next_buf, 10);
+		if (next_buf == curr_buf) {
+			if ((curr_buf - buf == count - 1) && (*curr_buf == '\n')) {   //end of line?
+				curr_buf++;
+				break;
+			}
+			//if we didn't got end of line but there is nothing more to read something went wrong...
+			printk("failed to parse vid value at %i (%s)\n", freq_index, curr_buf);
+			return -EINVAL;
+		}
+
+		state_index = freq_table[freq_index].index;
+		original_control = data->original_controls[state_index];
+		original_vid = original_control & INTEL_MSR_VID_MASK;
+		
+		/* before we store the values we do some checks to prevent 
+		 * users to set up values higher than the default one
+		 */
+		if (new_vid <= original_vid) {
+			new_control = (original_control & ~INTEL_MSR_VID_MASK) | new_vid;
+			dprintk("setting control at %i to %x (default is %x)\n",
+			        freq_index, new_control, original_control);
+			acpi_data->states[state_index].control = new_control;
+
+		} else {
+			printk("skipping vid at %i, %u is greater than default %u\n",
+			       freq_index, new_vid, original_vid);
+		}
+
+		curr_buf = next_buf;
+		/* jump over value seperators (space or comma).
+		 * There could be more than one space or comma character
+		 * to separate two values so we better do it using a loop.
+		 */
+		while ((curr_buf - buf < count) && ((*curr_buf == ' ') || (*curr_buf == ','))) {
+			curr_buf++;
+		}
+	}
+
+	/* set new voltage for current frequency */
+	data->resume = 1;
+	acpi_cpufreq_target(policy, get_cur_freq_on_cpu(policy->cpu), CPUFREQ_RELATION_L);
+
+	return curr_buf - buf;
+}
+
+static ssize_t store_freq_attr_controls(struct cpufreq_policy *policy, const char *buf, size_t count)
+ /* store the controls (frequency id's and related voltage id's)
+  * We are going to do some sanity checks here to prevent users 
+  * from setting higher voltages than the default one.
+  */
+{
+	struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu);
+	struct acpi_processor_performance *acpi_data;
+	struct cpufreq_frequency_table *freq_table;
+	const char   *curr_buf;
+	unsigned int  op_count;
+	unsigned int  state_index;
+	int           isok;
+	char         *next_buf;
+	ssize_t       retval;
+	unsigned int  new_vid;
+	unsigned int  original_vid;
+	unsigned int  new_fid;
+	unsigned int  old_fid;
+	unsigned int  original_control;
+	unsigned int  old_control;
+	unsigned int  new_control;
+	int           found;
+
+	if (!check_cpu_control_capability(data)) return -ENODEV;
+
+	retval = check_origial_table(data);
+        if (0 != retval)
+		return retval;
+
+	acpi_data = data->acpi_data;
+	freq_table = data->freq_table;
+
+	op_count = 0;
+	curr_buf = buf;
+	next_buf = NULL;
+	isok     = 1;
+	
+	while ( (isok) && (curr_buf != NULL) )
+	{
+		op_count++;
+		// Parse fid
+		new_fid = simple_strtoul(curr_buf, &next_buf, 10);
+		if ((next_buf != curr_buf) && (next_buf != NULL))
+		{
+			// Parse separator between frequency and voltage 
+			curr_buf = next_buf;
+			next_buf = NULL;
+			if (*curr_buf==':')
+			{
+				curr_buf++;
+				// Parse vid
+				new_vid = simple_strtoul(curr_buf, &next_buf, 10);
+				if ((next_buf != curr_buf) && (next_buf != NULL))
+				{
+					found = 0;
+					for (state_index = 0; state_index < acpi_data->state_count; state_index++) {
+						old_control = acpi_data->states[state_index].control;
+						old_fid = extract_fid_from_control(old_control);
+						if (new_fid == old_fid)
+						{
+							found = 1;
+							original_control = data->original_controls[state_index];
+							original_vid = extract_vid_from_control(original_control);
+							if (new_vid <= original_vid)
+							{
+								new_control = (original_control & ~INTEL_MSR_VID_MASK) | new_vid;
+								dprintk("setting control at %i to %x (default is %x)\n",
+								        state_index, new_control, original_control);
+								acpi_data->states[state_index].control = new_control;
+
+							} else {
+								printk("skipping vid at %i, %u is greater than default %u\n",
+								       state_index, new_vid, original_vid);
+							}
+						}
+					}
+
+					if (found == 0)
+					{
+						printk("operating point # %u not found (FID = %u)\n", op_count, new_fid);
+						isok = 0;
+					}
+
+					// Parse seprator before next operating point, if any
+					curr_buf = next_buf;
+					next_buf = NULL;
+					if ((*curr_buf == ',') || (*curr_buf == ' '))
+						curr_buf++;
+					else
+						curr_buf = NULL;
+				}
+				else
+				{
+					printk("failed to parse VID of operating point # %u (%s)\n", op_count, curr_buf);
+					isok = 0;
+				}
+			}
+			else
+			{
+				printk("failed to parse operating point # %u (%s)\n", op_count, curr_buf);
+				isok = 0;
+			}
+		}
+		else
+		{
+			printk("failed to parse FID of operating point # %u (%s)\n", op_count, curr_buf);
+			isok = 0;
+		}
+	}
+
+	if (isok)
+	{
+		retval = count;
+		/* set new voltage at current frequency */
+		data->resume = 1;
+		acpi_cpufreq_target(policy, get_cur_freq_on_cpu(policy->cpu), CPUFREQ_RELATION_L);
+	}
+	else
+	{
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static ssize_t show_freq_attr_phc_version(struct cpufreq_policy *policy, char *buf)
+ /* print out the phc version string set at the beginning of that file
+  */
+{
+	ssize_t count = 0;
+	count += sprintf(&buf[count], "%s\n", PHC_VERSION_STRING);
+	return count;
+}
+
+
+
+static struct freq_attr cpufreq_freq_attr_phc_version =
+{
+	/*display phc's version string*/
+       .attr = { .name = "phc_version", .mode = 0444, .owner = THIS_MODULE },
+       .show = show_freq_attr_phc_version,
+       .store = NULL,
+};
+
+static struct freq_attr cpufreq_freq_attr_vids =
+{
+	/*display phc's voltage id's for the cpu*/
+       .attr = { .name = "phc_vids", .mode = 0644, .owner = THIS_MODULE },
+       .show = show_freq_attr_vids,
+       .store = store_freq_attr_vids,
+};
+
+static struct freq_attr cpufreq_freq_attr_default_vids =
+{
+	/*display acpi's default frequency id's for the cpu*/
+       .attr = { .name = "phc_default_vids", .mode = 0444, .owner = THIS_MODULE },
+       .show = show_freq_attr_default_vids,
+       .store = NULL,
+};
+
+static struct freq_attr cpufreq_freq_attr_fids =
+{
+	/*display phc's default frequency id's for the cpu*/
+       .attr = { .name = "phc_fids", .mode = 0444, .owner = THIS_MODULE },
+       .show = show_freq_attr_fids,
+       .store = NULL,
+};
+
+static struct freq_attr cpufreq_freq_attr_controls =
+{
+	/*display phc's current voltage/frequency controls for the cpu*/
+       .attr = { .name = "phc_controls", .mode = 0644, .owner = THIS_MODULE },
+       .show = show_freq_attr_controls,
+       .store = store_freq_attr_controls,
+};
+
+static struct freq_attr cpufreq_freq_attr_default_controls =
+{
+	/*display acpi's default voltage/frequency controls for the cpu*/
+       .attr = { .name = "phc_default_controls", .mode = 0444, .owner = THIS_MODULE },
+       .show = show_freq_attr_default_controls,
+       .store = NULL,
+};
+
+
 static struct freq_attr *acpi_cpufreq_attr[] = {
-	&cpufreq_freq_attr_scaling_available_freqs,
+	&cpufreq_freq_attr_phc_version,
+ 	&cpufreq_freq_attr_scaling_available_freqs,
+	&cpufreq_freq_attr_vids,
+	&cpufreq_freq_attr_default_vids,
+	&cpufreq_freq_attr_fids,
+	&cpufreq_freq_attr_controls,
+	&cpufreq_freq_attr_default_controls,
 	NULL,
 };
 
diff -Npur linux-2.6-block/arch/x86/mm/fault.c linux-2.6-block-custom/arch/x86/mm/fault.c
--- linux-2.6-block/arch/x86/mm/fault.c	2008-09-27 16:12:12.078979941 +0900
+++ linux-2.6-block-custom/arch/x86/mm/fault.c	2008-09-26 19:45:20.362785844 +0900
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/suspend.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
@@ -60,6 +61,11 @@ static inline int kmmio_fault(struct pt_
 	return 0;
 }
 
+#ifdef CONFIG_X86_32
+int toi_faulted;
+EXPORT_SYMBOL_GPL(toi_faulted);
+#endif
+
 static inline int notify_page_fault(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
@@ -607,6 +613,22 @@ void __kprobes do_page_fault(struct pt_r
 
 	si_code = SEGV_MAPERR;
 
+	/* During a TuxOnIce atomic copy, with DEBUG_SLAB, we will
+	 * get page faults where slab has been unmapped. Map them
+	 * temporarily and set the variable that tells TuxOnIce to
+	 * unmap afterwards.
+	 */
+
+#ifdef CONFIG_DEBUG_PAGEALLOC /* X86_32 only */
+	if (unlikely(toi_running && !toi_faulted)) {
+		struct page *page = NULL;
+		toi_faulted = 1;
+		page = virt_to_page(address);
+		kernel_map_pages(page, 1, 1);
+		return;
+	}
+#endif
+
 	if (notify_page_fault(regs))
 		return;
 	if (unlikely(kmmio_fault(regs, address)))
diff -Npur linux-2.6-block/crypto/Kconfig linux-2.6-block-custom/crypto/Kconfig
--- linux-2.6-block/crypto/Kconfig	2008-09-27 16:12:12.326897414 +0900
+++ linux-2.6-block-custom/crypto/Kconfig	2008-09-26 19:45:20.362785844 +0900
@@ -666,6 +666,14 @@ config CRYPTO_LZO
 	help
 	  This is the LZO algorithm.
 
+config CRYPTO_LZF
+	tristate "LZF compression algorithm"
+	default y
+	select CRYPTO_ALGAPI
+	help
+	  This is the LZF algorithm. It is especially useful for TuxOnIce,
+	  because it achieves good compression quickly.
+
 source "drivers/crypto/Kconfig"
 
 endif	# if CRYPTO
diff -Npur linux-2.6-block/crypto/Makefile linux-2.6-block-custom/crypto/Makefile
--- linux-2.6-block/crypto/Makefile	2008-09-27 16:12:12.326897414 +0900
+++ linux-2.6-block-custom/crypto/Makefile	2008-09-26 19:45:20.362785844 +0900
@@ -67,6 +67,7 @@ obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_
 obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+obj-$(CONFIG_CRYPTO_LZF) += lzf.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 
diff -Npur linux-2.6-block/crypto/lzf.c linux-2.6-block-custom/crypto/lzf.c
--- linux-2.6-block/crypto/lzf.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/crypto/lzf.c	2008-09-26 20:24:17.711770002 +0900
@@ -0,0 +1,326 @@
+/*
+ * Cryptoapi LZF compression module.
+ *
+ * Copyright (c) 2004-2005 Nigel Cunningham <nigel at tuxonice net>
+ *
+ * based on the deflate.c file:
+ *
+ * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
+ *
+ * and upon the LZF compression module donated to the TuxOnIce project with
+ * the following copyright:
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ * Copyright (c) 2000-2003 Marc Alexander Lehmann <pcg@goof.com>
+ *
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ *
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ *
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ *   3.  The name of the author may not be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License version 2 (the "GPL"), in which case the
+ * provisions of the GPL are applicable instead of the above. If you wish to
+ * allow the use of your version of this file only under the terms of the
+ * GPL and not to allow others to use your version of this file under the
+ * BSD license, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the GPL. If
+ * you do not delete the provisions above, a recipient may use your version
+ * of this file under either the BSD or the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <asm/string.h>
+
+struct lzf_ctx {
+	void *hbuf;
+	unsigned int bufofs;
+};
+
+/*
+ * size of hashtable is (1 << hlog) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is also faster).
+ * For a low-memory configuration, use hlog == 13;
+ * For best compression, use 15 or 16.
+ */
+static const int hlog = 13;
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+static inline u16 first(const u8 *p)
+{
+	return ((p[0]) << 8) + p[1];
+}
+
+static inline u16 next(u8 v, const u8 *p)
+{
+	return ((v) << 8) + p[2];
+}
+
+static inline u32 idx(unsigned int h)
+{
+	return (((h ^ (h << 5)) >> (3*8 - hlog)) + h*3) & ((1 << hlog) - 1);
+}
+
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * (h * 57321 >> (3*8 - hlog))
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+static const int max_lit = (1 <<  5);
+static const int max_off = (1 << 13);
+static const int max_ref = ((1 <<  8) + (1 << 3));
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1>    ; literal
+ * LLLOOOOO oooooooo ; backref L
+ * 111OOOOO LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+static void lzf_compress_exit(struct crypto_tfm *tfm)
+{
+	struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!ctx->hbuf)
+		return;
+
+	vfree(ctx->hbuf);
+	ctx->hbuf = NULL;
+}
+
+static int lzf_compress_init(struct crypto_tfm *tfm)
+{
+	struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	/* Get LZF ready to go */
+	ctx->hbuf = vmalloc_32((1 << hlog) * sizeof(char *));
+	if (ctx->hbuf)
+		return 0;
+
+	printk(KERN_WARNING "Failed to allocate %ld bytes for lzf workspace\n",
+			(long) ((1 << hlog) * sizeof(char *)));
+	return -ENOMEM;
+}
+
+static int lzf_compress(struct crypto_tfm *tfm, const u8 *in_data,
+		unsigned int in_len, u8 *out_data, unsigned int *out_len)
+{
+	struct lzf_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u8 **htab = ctx->hbuf;
+	const u8 **hslot;
+	const u8 *ip = in_data;
+	u8 *op = out_data;
+	const u8 *in_end = ip + in_len;
+	u8 *out_end = op + *out_len - 3;
+	const u8 *ref;
+
+	unsigned int hval = first(ip);
+	unsigned long off;
+	int lit = 0;
+
+	memset(htab, 0, sizeof(htab));
+
+	for (;;) {
+		if (ip < in_end - 2) {
+			hval = next(hval, ip);
+			hslot = htab + idx(hval);
+			ref = *hslot;
+			*hslot = ip;
+
+			off = ip - ref - 1;
+			if (off < max_off
+			    && ip + 4 < in_end && ref > in_data
+			    && *(u16 *) ref == *(u16 *) ip && ref[2] == ip[2]
+			    ) {
+				/* match found at *ref++ */
+				unsigned int len = 2;
+				unsigned int maxlen = in_end - ip - len;
+				maxlen = maxlen > max_ref ? max_ref : maxlen;
+
+				do
+					len++;
+				while (len < maxlen && ref[len] == ip[len]);
+
+				if (op + lit + 1 + 3 >= out_end) {
+					*out_len = PAGE_SIZE;
+					return 0;
+				}
+
+				if (lit) {
+					*op++ = lit - 1;
+					lit = -lit;
+					do {
+						*op++ = ip[lit];
+					} while (++lit);
+				}
+
+				len -= 2;
+				ip++;
+
+				if (len < 7) {
+					*op++ = (off >> 8) + (len << 5);
+				} else {
+					*op++ = (off >> 8) + (7 << 5);
+					*op++ = len - 7;
+				}
+
+				*op++ = off;
+
+				ip += len;
+				hval = first(ip);
+				hval = next(hval, ip);
+				htab[idx(hval)] = ip;
+				ip++;
+				continue;
+			}
+		} else if (ip == in_end)
+			break;
+
+		/* one more literal byte we must copy */
+		lit++;
+		ip++;
+
+		if (lit == max_lit) {
+			if (op + 1 + max_lit >= out_end) {
+				*out_len = PAGE_SIZE;
+				return 0;
+			}
+
+			*op++ = max_lit - 1;
+			memcpy(op, ip - max_lit, max_lit);
+			op += max_lit;
+			lit = 0;
+		}
+	}
+
+	if (lit) {
+		if (op + lit + 1 >= out_end) {
+			*out_len = PAGE_SIZE;
+			return 0;
+		}
+
+		*op++ = lit - 1;
+		lit = -lit;
+		do {
+			*op++ = ip[lit];
+		} while (++lit);
+	}
+
+	*out_len = op - out_data;
+	return 0;
+}
+
+static int lzf_decompress(struct crypto_tfm *tfm, const u8 *src,
+		unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	u8 const *ip = src;
+	u8 *op = dst;
+	u8 const *const in_end = ip + slen;
+	u8 *const out_end = op + *dlen;
+
+	*dlen = PAGE_SIZE;
+	do {
+		unsigned int ctrl = *ip++;
+
+		if (ctrl < (1 << 5)) {
+			/* literal run */
+			ctrl++;
+
+			if (op + ctrl > out_end)
+				return 0;
+			memcpy(op, ip, ctrl);
+			op += ctrl;
+			ip += ctrl;
+		} else {	/* back reference */
+
+			unsigned int len = ctrl >> 5;
+
+			u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+			if (len == 7)
+				len += *ip++;
+
+			ref -= *ip++;
+			len += 2;
+
+			if (op + len > out_end || ref < (u8 *) dst)
+				return 0;
+
+			do {
+				*op++ = *ref++;
+			} while (--len);
+		}
+	} while (op < out_end && ip < in_end);
+
+	*dlen = op - (u8 *) dst;
+	return 0;
+}
+
+static struct crypto_alg alg = {
+	.cra_name = "lzf",
+	.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize = sizeof(struct lzf_ctx),
+	.cra_module = THIS_MODULE,
+	.cra_list = LIST_HEAD_INIT(alg.cra_list),
+	.cra_init = lzf_compress_init,
+	.cra_exit = lzf_compress_exit,
+	.cra_u = { .compress = {
+	.coa_compress = lzf_compress,
+	.coa_decompress = lzf_decompress } }
+};
+
+static int __init init(void)
+{
+	return crypto_register_alg(&alg);
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_alg(&alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZF Compression Algorithm");
+MODULE_AUTHOR("Marc Alexander Lehmann & Nigel Cunningham");
diff -Npur linux-2.6-block/drivers/hwmon/Kconfig linux-2.6-block-custom/drivers/hwmon/Kconfig
--- linux-2.6-block/drivers/hwmon/Kconfig	2008-09-27 16:12:14.018942631 +0900
+++ linux-2.6-block-custom/drivers/hwmon/Kconfig	2008-09-26 19:45:38.659791957 +0900
@@ -794,6 +794,7 @@ config SENSORS_W83627EHF
 config SENSORS_HDAPS
 	tristate "IBM Hard Drive Active Protection System (hdaps)"
 	depends on INPUT && X86
+	select THINKPAD_EC
 	select INPUT_POLLDEV
 	default n
 	help
diff -Npur linux-2.6-block/drivers/hwmon/hdaps.c linux-2.6-block-custom/drivers/hwmon/hdaps.c
--- linux-2.6-block/drivers/hwmon/hdaps.c	2008-09-27 16:12:14.086944221 +0900
+++ linux-2.6-block-custom/drivers/hwmon/hdaps.c	2008-09-26 19:45:38.659791957 +0900
@@ -28,263 +28,414 @@
 
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/input-polldev.h>
+#include <linux/input.h>
 #include <linux/kernel.h>
-#include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/dmi.h>
 #include <linux/jiffies.h>
-
-#include <asm/io.h>
-
-#define HDAPS_LOW_PORT		0x1600	/* first port used by hdaps */
-#define HDAPS_NR_PORTS		0x30	/* number of ports: 0x1600 - 0x162f */
-
-#define HDAPS_PORT_STATE	0x1611	/* device state */
-#define HDAPS_PORT_YPOS		0x1612	/* y-axis position */
-#define	HDAPS_PORT_XPOS		0x1614	/* x-axis position */
-#define HDAPS_PORT_TEMP1	0x1616	/* device temperature, in Celsius */
-#define HDAPS_PORT_YVAR		0x1617	/* y-axis variance (what is this?) */
-#define HDAPS_PORT_XVAR		0x1619	/* x-axis variance (what is this?) */
-#define HDAPS_PORT_TEMP2	0x161b	/* device temperature (again?) */
-#define HDAPS_PORT_UNKNOWN	0x161c	/* what is this? */
-#define HDAPS_PORT_KMACT	0x161d	/* keyboard or mouse activity */
-
-#define STATE_FRESH		0x50	/* accelerometer data is fresh */
+#include <linux/thinkpad_ec.h>
+#include <linux/pci_ids.h>
+#include <linux/version.h>
+
+/* Embedded controller accelerometer read command and its result: */
+static const struct thinkpad_ec_row ec_accel_args =
+	{ .mask = 0x0001, .val = {0x11} };
+#define EC_ACCEL_IDX_READOUTS	0x1	/* readouts included in this read */
+					/* First readout, if READOUTS>=1: */
+#define EC_ACCEL_IDX_YPOS1	0x2	/*   y-axis position word */
+#define EC_ACCEL_IDX_XPOS1	0x4	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP1	0x6	/*   device temperature in Celsius */
+					/* Second readout, if READOUTS>=2: */
+#define EC_ACCEL_IDX_XPOS2	0x7	/*   y-axis position word */
+#define EC_ACCEL_IDX_YPOS2	0x9	/*   x-axis position word */
+#define EC_ACCEL_IDX_TEMP2	0xb	/*   device temperature in Celsius */
+#define EC_ACCEL_IDX_QUEUED	0xc	/* Number of queued readouts left */
+#define EC_ACCEL_IDX_KMACT	0xd	/* keyboard or mouse activity */
+#define EC_ACCEL_IDX_RETVAL	0xf	/* command return value, good=0x00 */
 
 #define KEYBD_MASK		0x20	/* set if keyboard activity */
 #define MOUSE_MASK		0x40	/* set if mouse activity */
-#define KEYBD_ISSET(n)		(!! (n & KEYBD_MASK))	/* keyboard used? */
-#define MOUSE_ISSET(n)		(!! (n & MOUSE_MASK))	/* mouse used? */
 
-#define INIT_TIMEOUT_MSECS	4000	/* wait up to 4s for device init ... */
-#define INIT_WAIT_MSECS		200	/* ... in 200ms increments */
+#define READ_TIMEOUT_MSECS	100	/* wait this long for device read */
+#define RETRY_MSECS		3	/* retry delay */
 
-#define HDAPS_POLL_INTERVAL	50	/* poll for input every 1/20s (50 ms)*/
 #define HDAPS_INPUT_FUZZ	4	/* input event threshold */
 #define HDAPS_INPUT_FLAT	4
+#define KMACT_REMEMBER_PERIOD   (HZ/10) /* keyboard/mouse persistance */
 
-static struct platform_device *pdev;
-static struct input_polled_dev *hdaps_idev;
-static unsigned int hdaps_invert;
-static u8 km_activity;
-static int rest_x;
-static int rest_y;
-
-static DEFINE_MUTEX(hdaps_mtx);
+/* Input IDs */
+#define HDAPS_INPUT_VENDOR	PCI_VENDOR_ID_IBM
+#define HDAPS_INPUT_PRODUCT	0x5054 /* "TP", shared with thinkpad_acpi */
+#define HDAPS_INPUT_JS_VERSION	0x6801 /* Joystick emulation input device */
+#define HDAPS_INPUT_RAW_VERSION	0x4801 /* Raw accelerometer input device */
+
+/* Axis orientation. */
+/* The unnatural bit-representation of inversions is for backward
+ * compatibility with the"invert=1" module parameter.             */
+#define HDAPS_ORIENT_INVERT_XY  0x01   /* Invert both X and Y axes.       */
+#define HDAPS_ORIENT_INVERT_X   0x02   /* Invert the X axis (uninvert if
+					* already inverted by INVERT_XY). */
+#define HDAPS_ORIENT_SWAP       0x04   /* Swap the axes. The swap occurs
+					* before inverting X or Y.        */
+#define HDAPS_ORIENT_MAX        0x07
+#define HDAPS_ORIENT_UNDEFINED  0xFF   /* Placeholder during initialization */
+#define HDAPS_ORIENT_INVERT_Y   (HDAPS_ORIENT_INVERT_XY | HDAPS_ORIENT_INVERT_X)
 
-/*
- * __get_latch - Get the value from a given port.  Callers must hold hdaps_mtx.
- */
-static inline u8 __get_latch(u16 port)
-{
-	return inb(port) & 0xff;
+static struct timer_list hdaps_timer;
+static struct platform_device *pdev;
+static struct input_dev *hdaps_idev;     /* joystick-like device with fuzz */
+static struct input_dev *hdaps_idev_raw; /* raw hdaps sensor readouts */
+static unsigned int hdaps_invert = HDAPS_ORIENT_UNDEFINED;
+static int needs_calibration;
+
+
+/* Configuration: */
+static int sampling_rate = 50;       /* Sampling rate  */
+static int oversampling_ratio = 5;   /* Ratio between our sampling rate and
+				      * EC accelerometer sampling rate      */
+static int running_avg_filter_order = 2; /* EC running average filter order */
+static int fake_data_mode;           /* Enable EC fake data mode? */
+
+/* Latest state readout: */
+static int pos_x, pos_y;      /* position */
+static int temperature;       /* temperature */
+static int stale_readout = 1; /* last read invalid */
+static int rest_x, rest_y;    /* calibrated rest position */
+
+/* Last time we saw keyboard and mouse activity: */
+static u64 last_keyboard_jiffies = INITIAL_JIFFIES;
+static u64 last_mouse_jiffies = INITIAL_JIFFIES;
+static u64 last_update_jiffies = INITIAL_JIFFIES;
+
+/* input device use count */
+static int hdaps_users;
+static DEFINE_MUTEX(hdaps_users_mtx);
+
+/* Some models require an axis transformation to the standard representation */
+static void transform_axes(int *x, int *y)
+{
+	if (hdaps_invert & HDAPS_ORIENT_SWAP) {
+		int z;
+		z = *x;
+		*x = *y;
+		*y = z;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_XY) {
+		*x = -*x;
+		*y = -*y;
+	}
+	if (hdaps_invert & HDAPS_ORIENT_INVERT_X)
+		*x = -*x;
 }
 
-/*
- * __check_latch - Check a port latch for a given value.  Returns zero if the
- * port contains the given value.  Callers must hold hdaps_mtx.
+/**
+ * __hdaps_update - query current state, with locks already acquired
+ * @fast: if nonzero, do one quick attempt without retries.
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query. Caller must hold controller lock.
  */
-static inline int __check_latch(u16 port, u8 val)
+static int __hdaps_update(int fast)
 {
-	if (__get_latch(port) == val)
-		return 0;
-	return -EINVAL;
+	/* Read data: */
+	struct thinkpad_ec_row data;
+	int ret;
+
+	data.mask = (1 << EC_ACCEL_IDX_READOUTS) | (1 << EC_ACCEL_IDX_KMACT) |
+		    (3 << EC_ACCEL_IDX_YPOS1)    | (3 << EC_ACCEL_IDX_XPOS1) |
+		    (1 << EC_ACCEL_IDX_TEMP1)    | (1 << EC_ACCEL_IDX_RETVAL);
+	if (fast)
+		ret = thinkpad_ec_try_read_row(&ec_accel_args, &data);
+	else
+		ret = thinkpad_ec_read_row(&ec_accel_args, &data);
+	thinkpad_ec_prefetch_row(&ec_accel_args); /* Prefetch even if error */
+	if (ret)
+		return ret;
+
+	/* Check status: */
+	if (data.val[EC_ACCEL_IDX_RETVAL] != 0x00) {
+		printk(KERN_WARNING "hdaps: read RETVAL=0x%02x\n",
+		       data.val[EC_ACCEL_IDX_RETVAL]);
+		return -EIO;
+	}
+
+	if (data.val[EC_ACCEL_IDX_READOUTS] < 1)
+		return -EBUSY; /* no pending readout, try again later */
+
+	/* Parse position data: */
+	pos_x = *(s16 *)(data.val+EC_ACCEL_IDX_XPOS1);
+	pos_y = *(s16 *)(data.val+EC_ACCEL_IDX_YPOS1);
+	transform_axes(&pos_x, &pos_y);
+
+	/* Keyboard and mouse activity status is cleared as soon as it's read,
+	 * so applications will eat each other's events. Thus we remember any
+	 * event for KMACT_REMEMBER_PERIOD jiffies.
+	 */
+	if (data.val[EC_ACCEL_IDX_KMACT] & KEYBD_MASK)
+		last_keyboard_jiffies = get_jiffies_64();
+	if (data.val[EC_ACCEL_IDX_KMACT] & MOUSE_MASK)
+		last_mouse_jiffies = get_jiffies_64();
+
+	temperature = data.val[EC_ACCEL_IDX_TEMP1];
+
+	last_update_jiffies = get_jiffies_64();
+	stale_readout = 0;
+	if (needs_calibration) {
+		rest_x = pos_x;
+		rest_y = pos_y;
+		needs_calibration = 0;
+	}
+
+	return 0;
 }
 
-/*
- * __wait_latch - Wait up to 100us for a port latch to get a certain value,
- * returning zero if the value is obtained.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_update - acquire locks and query current state
+ *
+ * Query current accelerometer state and update global state variables.
+ * Also prefetches the next query.
+ * Retries until timeout if the accelerometer is not in ready status (common).
+ * Does its own locking.
  */
-static int __wait_latch(u16 port, u8 val)
+static int hdaps_update(void)
 {
-	unsigned int i;
+	u64 age = get_jiffies_64() - last_update_jiffies;
+	int total, ret;
+
+	if (!stale_readout && age < (9*HZ)/(10*sampling_rate))
+		return 0; /* already updated recently */
+	for (total = 0; total < READ_TIMEOUT_MSECS; total += RETRY_MSECS) {
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+		ret = __hdaps_update(0);
+		thinkpad_ec_unlock();
 
-	for (i = 0; i < 20; i++) {
-		if (!__check_latch(port, val))
+		if (!ret)
 			return 0;
-		udelay(5);
+		if (ret != -EBUSY)
+			break;
+		msleep(RETRY_MSECS);
 	}
-
-	return -EIO;
+	return ret;
 }
 
-/*
- * __device_refresh - request a refresh from the accelerometer.  Does not wait
- * for refresh to complete.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_power - enable or disable power to the accelerometer.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static void __device_refresh(void)
+static int hdaps_set_power(int on)
 {
-	udelay(200);
-	if (inb(0x1604) != STATE_FRESH) {
-		outb(0x11, 0x1610);
-		outb(0x01, 0x161f);
-	}
+	struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x14, on?0x01:0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	return 0;
 }
 
-/*
- * __device_refresh_sync - request a synchronous refresh from the
- * accelerometer.  We wait for the refresh to complete.  Returns zero if
- * successful and nonzero on error.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_fake_data_mode - enable or disable EC test mode
+ * EC test mode fakes accelerometer data using an incrementing counter.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int __device_refresh_sync(void)
+static int hdaps_set_fake_data_mode(int on)
 {
-	__device_refresh();
-	return __wait_latch(0x1604, STATE_FRESH);
+	struct thinkpad_ec_row args =
+		{ .mask = 0x0007, .val = {0x17, 0x83, on?0x01:0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00) {
+		printk(KERN_WARNING "failed setting hdaps fake data to %d\n",
+		       on);
+		return -EIO;
+	}
+	printk(KERN_DEBUG "hdaps: fake_data_mode set to %d\n", on);
+	return 0;
 }
 
-/*
- * __device_complete - indicate to the accelerometer that we are done reading
- * data, and then initiate an async refresh.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_set_ec_config - set accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * (Normally we have @ec_rate = sampling_rate * oversampling_ratio.)
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static inline void __device_complete(void)
+static int hdaps_set_ec_config(int ec_rate, int order)
 {
-	inb(0x161f);
-	inb(0x1604);
-	__device_refresh();
+	struct thinkpad_ec_row args = { .mask = 0x000F,
+		.val = {0x10, (u8)ec_rate, (u8)(ec_rate>>8), order} };
+	struct thinkpad_ec_row data = { .mask = 0x8000 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	printk(KERN_DEBUG "hdaps: setting ec_rate=%d, filter_order=%d\n",
+	       ec_rate, order);
+	if (ret)
+		return ret;
+	if (data.val[0xF] == 0x03) {
+		printk(KERN_WARNING "hdaps: config param out of range\n");
+		return -EINVAL;
+	}
+	if (data.val[0xF] == 0x06) {
+		printk(KERN_WARNING "hdaps: config change already pending\n");
+		return -EBUSY;
+	}
+	if (data.val[0xF] != 0x00) {
+		printk(KERN_WARNING "hdaps: config change error, ret=%d\n",
+		      data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_readb_one - reads a byte from a single I/O port, placing the value in
- * the given pointer.  Returns zero on success or a negative error on failure.
- * Can sleep.
+/**
+ * hdaps_get_ec_config - get accelerometer parameters.
+ * @ec_rate: embedded controller sampling rate
+ * @order: embedded controller running average filter order
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_readb_one(unsigned int port, u8 *val)
+static int hdaps_get_ec_config(int *ec_rate, int *order)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	ret = __device_refresh_sync();
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x82} };
+	struct thinkpad_ec_row data = { .mask = 0x801F };
+	int ret = thinkpad_ec_read_row(&args, &data);
 	if (ret)
-		goto out;
-
-	*val = inb(port);
-	__device_complete();
-
-out:
-	mutex_unlock(&hdaps_mtx);
-	return ret;
+		return ret;
+	if (data.val[0xF] != 0x00)
+		return -EIO;
+	if (!(data.val[0x1] & 0x01))
+		return -ENXIO; /* accelerometer polling not enabled */
+	if (data.val[0x1] & 0x02)
+		return -EBUSY; /* config change in progress, retry later */
+	*ec_rate = data.val[0x2] | ((int)(data.val[0x3]) << 8);
+	*order = data.val[0x4];
+	return 0;
 }
 
-/* __hdaps_read_pair - internal lockless helper for hdaps_read_pair(). */
-static int __hdaps_read_pair(unsigned int port1, unsigned int port2,
-			     int *x, int *y)
+/**
+ * hdaps_get_ec_mode - get EC accelerometer mode
+ * Returns zero on success and negative error code on failure.  Can sleep.
+ */
+static int hdaps_get_ec_mode(u8 *mode)
 {
-	/* do a sync refresh -- we need to be sure that we read fresh data */
-	if (__device_refresh_sync())
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0001, .val = {0x13} };
+	struct thinkpad_ec_row data = { .mask = 0x8002 };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return ret;
+	if (data.val[0xF] != 0x00) {
+		printk(KERN_WARNING
+		       "accelerometer not implemented (0x%02x)\n",
+		       data.val[0xF]);
 		return -EIO;
-
-	*y = inw(port2);
-	*x = inw(port1);
-	km_activity = inb(HDAPS_PORT_KMACT);
-	__device_complete();
-
-	/* if hdaps_invert is set, negate the two values */
-	if (hdaps_invert) {
-		*x = -*x;
-		*y = -*y;
 	}
-
+	*mode = data.val[0x1];
 	return 0;
 }
 
-/*
- * hdaps_read_pair - reads the values from a pair of ports, placing the values
- * in the given pointers.  Returns zero on success.  Can sleep.
+/**
+ * hdaps_check_ec - checks something about the EC.
+ * Follows the clean-room spec for HDAPS; we don't know what it means.
+ * Returns zero on success and negative error code on failure.  Can sleep.
  */
-static int hdaps_read_pair(unsigned int port1, unsigned int port2,
-			   int *val1, int *val2)
+static int hdaps_check_ec(void)
 {
-	int ret;
-
-	mutex_lock(&hdaps_mtx);
-	ret = __hdaps_read_pair(port1, port2, val1, val2);
-	mutex_unlock(&hdaps_mtx);
-
-	return ret;
+	const struct thinkpad_ec_row args =
+		{ .mask = 0x0003, .val = {0x17, 0x81} };
+	struct thinkpad_ec_row data = { .mask = 0x800E };
+	int ret = thinkpad_ec_read_row(&args, &data);
+	if (ret)
+		return  ret;
+	if (!((data.val[0x1] == 0x00 && data.val[0x2] == 0x60) || /* cleanroom spec */
+	      (data.val[0x1] == 0x01 && data.val[0x2] == 0x00)) || /* seen on T61 */
+	    data.val[0x3] != 0x00 || data.val[0xF] != 0x00) {
+		printk(KERN_WARNING
+		       "hdaps_check_ec: bad response (0x%x,0x%x,0x%x,0x%x)\n",
+		       data.val[0x1], data.val[0x2],
+		       data.val[0x3], data.val[0xF]);
+		return -EIO;
+	}
+	return 0;
 }
 
-/*
- * hdaps_device_init - initialize the accelerometer.  Returns zero on success
- * and negative error code on failure.  Can sleep.
+/**
+ * hdaps_device_init - initialize the accelerometer.
+ *
+ * Call several embedded controller functions to test and initialize the
+ * accelerometer.
+ * Returns zero on success and negative error code on failure. Can sleep.
  */
+#define FAILED_INIT(msg) printk(KERN_ERR "hdaps init failed at: %s\n", msg)
 static int hdaps_device_init(void)
 {
-	int total, ret = -ENXIO;
+	int ret;
+	u8 mode;
 
-	mutex_lock(&hdaps_mtx);
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
 
-	outb(0x13, 0x1610);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
+	if (hdaps_get_ec_mode(&mode))
+		{ FAILED_INIT("hdaps_get_ec_mode failed"); goto bad; }
 
-	/*
-	 * Most ThinkPads return 0x01.
-	 *
-	 * Others--namely the R50p, T41p, and T42p--return 0x03.  These laptops
-	 * have "inverted" axises.
-	 *
-	 * The 0x02 value occurs when the chip has been previously initialized.
-	 */
-	if (__check_latch(0x1611, 0x03) &&
-		     __check_latch(0x1611, 0x02) &&
-		     __check_latch(0x1611, 0x01))
-		goto out;
+	printk(KERN_DEBUG "hdaps: initial mode latch is 0x%02x\n", mode);
+	if (mode == 0x00)
+		{ FAILED_INIT("accelerometer not available"); goto bad; }
 
-	printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x).\n",
-	       __get_latch(0x1611));
+	if (hdaps_check_ec())
+		{ FAILED_INIT("hdaps_check_ec failed"); goto bad; }
 
-	outb(0x17, 0x1610);
-	outb(0x81, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
-	if (__wait_latch(0x1612, 0x60))
-		goto out;
-	if (__wait_latch(0x1613, 0x00))
-		goto out;
-	outb(0x14, 0x1610);
-	outb(0x01, 0x1611);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	outb(0x10, 0x1610);
-	outb(0xc8, 0x1611);
-	outb(0x00, 0x1612);
-	outb(0x02, 0x1613);
-	outb(0x01, 0x161f);
-	if (__wait_latch(0x161f, 0x00))
-		goto out;
-	if (__device_refresh_sync())
-		goto out;
-	if (__wait_latch(0x1611, 0x00))
-		goto out;
+	if (hdaps_set_power(1))
+		{ FAILED_INIT("hdaps_set_power failed"); goto bad; }
 
-	/* we have done our dance, now let's wait for the applause */
-	for (total = INIT_TIMEOUT_MSECS; total > 0; total -= INIT_WAIT_MSECS) {
-		int x, y;
-
-		/* a read of the device helps push it into action */
-		__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-		if (!__wait_latch(0x1611, 0x02)) {
-			ret = 0;
-			break;
-		}
+	if (hdaps_set_ec_config(sampling_rate*oversampling_ratio,
+				running_avg_filter_order))
+		{ FAILED_INIT("hdaps_set_ec_config failed"); goto bad; }
 
-		msleep(INIT_WAIT_MSECS);
-	}
+	if (hdaps_set_fake_data_mode(fake_data_mode))
+		{ FAILED_INIT("hdaps_set_fake_data_mode failed"); goto bad; }
 
-out:
-	mutex_unlock(&hdaps_mtx);
+	thinkpad_ec_invalidate();
+	udelay(200);
+
+	/* Just prefetch instead of reading, to avoid ~1sec delay on load */
+	ret = thinkpad_ec_prefetch_row(&ec_accel_args);
+	if (ret)
+		{ FAILED_INIT("initial prefetch failed"); goto bad; }
+	goto good;
+bad:
+	thinkpad_ec_invalidate();
+	ret = -ENXIO;
+good:
+	stale_readout = 1;
+	thinkpad_ec_unlock();
 	return ret;
 }
 
+/**
+ * hdaps_device_shutdown - power off the accelerometer
+ * Returns nonzero on failure. Can sleep.
+ */
+static int hdaps_device_shutdown(void)
+{
+	int ret;
+	ret = hdaps_set_power(0);
+	if (ret) {
+		printk(KERN_WARNING "hdaps: cannot power off\n");
+		return ret;
+	}
+	ret = hdaps_set_ec_config(0, 1);
+	if (ret)
+		printk(KERN_WARNING "hdaps: cannot stop EC sampling\n");
+	return ret;
+}
 
 /* Device model stuff */
 
@@ -300,13 +451,30 @@ static int hdaps_probe(struct platform_d
 	return 0;
 }
 
+static int hdaps_suspend(struct platform_device *dev, pm_message_t state)
+{
+	/* Don't do hdaps polls until resume re-initializes the sensor. */
+	del_timer_sync(&hdaps_timer);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
+	return 0;
+}
+
 static int hdaps_resume(struct platform_device *dev)
 {
-	return hdaps_device_init();
+	int ret = hdaps_device_init();
+	if (ret)
+		return ret;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users)
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
 }
 
 static struct platform_driver hdaps_driver = {
 	.probe = hdaps_probe,
+	.suspend = hdaps_suspend,
 	.resume = hdaps_resume,
 	.driver	= {
 		.name = "hdaps",
@@ -314,30 +482,48 @@ static struct platform_driver hdaps_driv
 	},
 };
 
-/*
- * hdaps_calibrate - Set our "resting" values.  Callers must hold hdaps_mtx.
+/**
+ * hdaps_calibrate - set our "resting" values.
+ * Does its own locking.
  */
 static void hdaps_calibrate(void)
 {
-	__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y);
+	needs_calibration = 1;
+	hdaps_update();
+	/* If that fails, the mousedev poll will take care of things later. */
 }
 
-static void hdaps_mousedev_poll(struct input_polled_dev *dev)
+/* Timer handler for updating the input device. Runs in softirq context,
+ * so avoid lenghty or blocking operations.
+ */
+static void hdaps_mousedev_poll(unsigned long unused)
 {
-	struct input_dev *input_dev = dev->input;
-	int x, y;
+	int ret;
 
-	mutex_lock(&hdaps_mtx);
+	stale_readout = 1;
 
-	if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y))
-		goto out;
-
-	input_report_abs(input_dev, ABS_X, x - rest_x);
-	input_report_abs(input_dev, ABS_Y, y - rest_y);
-	input_sync(input_dev);
+	/* Cannot sleep.  Try nonblockingly.  If we fail, try again later. */
+	if (thinkpad_ec_try_lock())
+		goto keep_active;
+
+	ret = __hdaps_update(1); /* fast update, we're in softirq context */
+	thinkpad_ec_unlock();
+	/* Any of "successful", "not yet ready" and "not prefetched"? */
+	if (ret != 0 && ret != -EBUSY && ret != -ENODATA) {
+		printk(KERN_ERR
+		       "hdaps: poll failed, disabling updates\n");
+		return;
+	}
 
-out:
-	mutex_unlock(&hdaps_mtx);
+keep_active:
+	/* Even if we failed now, pos_x,y may have been updated earlier: */
+	input_report_abs(hdaps_idev, ABS_X, pos_x - rest_x);
+	input_report_abs(hdaps_idev, ABS_Y, pos_y - rest_y);
+	input_sync(hdaps_idev);
+	input_report_abs(hdaps_idev_raw, ABS_X, pos_x);
+	input_report_abs(hdaps_idev_raw, ABS_Y, pos_y);
+	input_sync(hdaps_idev_raw);
+	mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
 }
 
 
@@ -346,65 +532,41 @@ out:
 static ssize_t hdaps_position_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y);
-	if (ret)
-		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
-}
-
-static ssize_t hdaps_variance_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	int ret, x, y;
-
-	ret = hdaps_read_pair(HDAPS_PORT_XVAR, HDAPS_PORT_YVAR, &x, &y);
+	int ret = hdaps_update();
 	if (ret)
 		return ret;
-
-	return sprintf(buf, "(%d,%d)\n", x, y);
+	return sprintf(buf, "(%d,%d)\n", pos_x, pos_y);
 }
 
 static ssize_t hdaps_temp1_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	u8 temp;
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP1, &temp);
-	if (ret < 0)
-		return ret;
-
-	return sprintf(buf, "%u\n", temp);
-}
-
-static ssize_t hdaps_temp2_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	u8 temp;
-	int ret;
-
-	ret = hdaps_readb_one(HDAPS_PORT_TEMP2, &temp);
-	if (ret < 0)
+	int ret = hdaps_update();
+	if (ret)
 		return ret;
-
-	return sprintf(buf, "%u\n", temp);
+	return sprintf(buf, "%d\n", temperature);
 }
 
 static ssize_t hdaps_keyboard_activity_show(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
 {
-	return sprintf(buf, "%u\n", KEYBD_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_keyboard_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_mouse_activity_show(struct device *dev,
 					 struct device_attribute *attr,
 					 char *buf)
 {
-	return sprintf(buf, "%u\n", MOUSE_ISSET(km_activity));
+	int ret = hdaps_update();
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n",
+	   get_jiffies_64() < last_mouse_jiffies + KMACT_REMEMBER_PERIOD);
 }
 
 static ssize_t hdaps_calibrate_show(struct device *dev,
@@ -417,10 +579,7 @@ static ssize_t hdaps_calibrate_store(str
 				     struct device_attribute *attr,
 				     const char *buf, size_t count)
 {
-	mutex_lock(&hdaps_mtx);
 	hdaps_calibrate();
-	mutex_unlock(&hdaps_mtx);
-
 	return count;
 }
 
@@ -436,7 +595,8 @@ static ssize_t hdaps_invert_store(struct
 {
 	int invert;
 
-	if (sscanf(buf, "%d", &invert) != 1 || (invert != 1 && invert != 0))
+	if (sscanf(buf, "%d", &invert) != 1 ||
+	    invert < 0 || invert > HDAPS_ORIENT_MAX)
 		return -EINVAL;
 
 	hdaps_invert = invert;
@@ -445,24 +605,152 @@ static ssize_t hdaps_invert_store(struct
 	return count;
 }
 
+static ssize_t hdaps_sampling_rate_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", sampling_rate);
+}
+
+static ssize_t hdaps_sampling_rate_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int rate, ret;
+	if (sscanf(buf, "%d", &rate) != 1 || rate > HZ || rate <= 0) {
+		printk(KERN_WARNING
+		       "must have 0<input_sampling_rate<=HZ=%d\n", HZ);
+		return -EINVAL;
+	}
+	ret = hdaps_set_ec_config(rate*oversampling_ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	sampling_rate = rate;
+	return count;
+}
+
+static ssize_t hdaps_oversampling_ratio_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ec_rate, order;
+	int ret = hdaps_get_ec_config(&ec_rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", ec_rate / sampling_rate);
+}
+
+static ssize_t hdaps_oversampling_ratio_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int ratio, ret;
+	if (sscanf(buf, "%d", &ratio) != 1 || ratio < 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*ratio,
+				  running_avg_filter_order);
+	if (ret)
+		return ret;
+	oversampling_ratio = ratio;
+	return count;
+}
+
+static ssize_t hdaps_running_avg_filter_order_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int rate, order;
+	int ret = hdaps_get_ec_config(&rate, &order);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%u\n", order);
+}
+
+static ssize_t hdaps_running_avg_filter_order_store(
+	struct device *dev, struct device_attribute *attr,
+	const char *buf, size_t count)
+{
+	int order, ret;
+	if (sscanf(buf, "%d", &order) != 1)
+		return -EINVAL;
+	ret = hdaps_set_ec_config(sampling_rate*oversampling_ratio, order);
+	if (ret)
+		return ret;
+	running_avg_filter_order = order;
+	return count;
+}
+
+static ssize_t hdaps_fake_data_mode_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	int on, ret;
+	if (sscanf(buf, "%d", &on) != 1 || on < 0 || on > 1)
+		return -EINVAL;
+	ret = hdaps_set_fake_data_mode(on);
+	if (ret)
+		return ret;
+	fake_data_mode = on;
+	return count;
+}
+
+static ssize_t hdaps_fake_data_mode_show(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", fake_data_mode);
+}
+
+static int hdaps_mousedev_open(struct input_dev *dev)
+{
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&hdaps_users_mtx);
+	if (hdaps_users++ == 0) /* first input user */
+		mod_timer(&hdaps_timer, jiffies + HZ/sampling_rate);
+	mutex_unlock(&hdaps_users_mtx);
+	return 0;
+}
+
+static void hdaps_mousedev_close(struct input_dev *dev)
+{
+	mutex_lock(&hdaps_users_mtx);
+	if (--hdaps_users == 0) /* no input users left */
+		del_timer_sync(&hdaps_timer);
+	mutex_unlock(&hdaps_users_mtx);
+
+	module_put(THIS_MODULE);
+}
+
 static DEVICE_ATTR(position, 0444, hdaps_position_show, NULL);
-static DEVICE_ATTR(variance, 0444, hdaps_variance_show, NULL);
 static DEVICE_ATTR(temp1, 0444, hdaps_temp1_show, NULL);
-static DEVICE_ATTR(temp2, 0444, hdaps_temp2_show, NULL);
-static DEVICE_ATTR(keyboard_activity, 0444, hdaps_keyboard_activity_show, NULL);
+  /* "temp1" instead of "temperature" is hwmon convention */
+static DEVICE_ATTR(keyboard_activity, 0444,
+		   hdaps_keyboard_activity_show, NULL);
 static DEVICE_ATTR(mouse_activity, 0444, hdaps_mouse_activity_show, NULL);
-static DEVICE_ATTR(calibrate, 0644, hdaps_calibrate_show,hdaps_calibrate_store);
+static DEVICE_ATTR(calibrate, 0644,
+		   hdaps_calibrate_show, hdaps_calibrate_store);
 static DEVICE_ATTR(invert, 0644, hdaps_invert_show, hdaps_invert_store);
+static DEVICE_ATTR(sampling_rate, 0644,
+		   hdaps_sampling_rate_show, hdaps_sampling_rate_store);
+static DEVICE_ATTR(oversampling_ratio, 0644,
+		   hdaps_oversampling_ratio_show,
+		   hdaps_oversampling_ratio_store);
+static DEVICE_ATTR(running_avg_filter_order, 0644,
+		   hdaps_running_avg_filter_order_show,
+		   hdaps_running_avg_filter_order_store);
+static DEVICE_ATTR(fake_data_mode, 0644,
+		   hdaps_fake_data_mode_show, hdaps_fake_data_mode_store);
 
 static struct attribute *hdaps_attributes[] = {
 	&dev_attr_position.attr,
-	&dev_attr_variance.attr,
 	&dev_attr_temp1.attr,
-	&dev_attr_temp2.attr,
 	&dev_attr_keyboard_activity.attr,
 	&dev_attr_mouse_activity.attr,
 	&dev_attr_calibrate.attr,
 	&dev_attr_invert.attr,
+	&dev_attr_sampling_rate.attr,
+	&dev_attr_oversampling_ratio.attr,
+	&dev_attr_running_avg_filter_order.attr,
+	&dev_attr_fake_data_mode.attr,
 	NULL,
 };
 
@@ -473,88 +761,63 @@ static struct attribute_group hdaps_attr
 
 /* Module stuff */
 
-/* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
-static int __init hdaps_dmi_match(const struct dmi_system_id *id)
-{
-	printk(KERN_INFO "hdaps: %s detected.\n", id->ident);
-	return 1;
-}
-
 /* hdaps_dmi_match_invert - found an inverted match. */
 static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id)
 {
-	hdaps_invert = 1;
-	printk(KERN_INFO "hdaps: inverting axis readings.\n");
-	return hdaps_dmi_match(id);
-}
-
-#define HDAPS_DMI_MATCH_NORMAL(vendor, model) {		\
-	.ident = vendor " " model,			\
-	.callback = hdaps_dmi_match,			\
-	.matches = {					\
-		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
-		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
-	}						\
+	int orient = (int) id->driver_data;
+	hdaps_invert = orient;
+	printk(KERN_INFO "hdaps: %s detected, setting orientation %d\n",
+	       id->ident, orient);
+	return 1; /* stop enumeration */
 }
 
-#define HDAPS_DMI_MATCH_INVERT(vendor, model) {		\
+#define HDAPS_DMI_MATCH_INVERT(vendor, model, orient) { \
 	.ident = vendor " " model,			\
 	.callback = hdaps_dmi_match_invert,		\
+	.driver_data = (void *)(orient),		\
 	.matches = {					\
 		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
 		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
 	}						\
 }
 
-/* Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
-   "ThinkPad T42p", so the order of the entries matters.
-   If your ThinkPad is not recognized, please update to latest
-   BIOS. This is especially the case for some R52 ThinkPads. */
-static struct dmi_system_id __initdata hdaps_whitelist[] = {
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R50"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R51"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad R52"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61i"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T41"),
-	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T42"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad T43"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61p"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X40"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad X41"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61s"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61"),
-	HDAPS_DMI_MATCH_NORMAL("IBM", "ThinkPad Z60m"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61m"),
-	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad Z61p"),
+/* List of models with abnormal axis configuration.
+   Note that HDAPS_DMI_MATCH_NORMAL("ThinkPad T42") would match
+   "ThinkPad T42p", and enumeration stops after first match,
+   so the order of the entries matters. */
+struct dmi_system_id __initdata hdaps_whitelist[] = {
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R50p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T41p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad T42p", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X40", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("IBM", "ThinkPad X41", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad R61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T60", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad T61", HDAPS_ORIENT_INVERT_XY),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60 Tablet", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60s", HDAPS_ORIENT_INVERT_Y),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X60", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
+	HDAPS_DMI_MATCH_INVERT("LENOVO", "ThinkPad X61", HDAPS_ORIENT_SWAP | HDAPS_ORIENT_INVERT_X),
 	{ .ident = NULL }
 };
 
 static int __init hdaps_init(void)
 {
-	struct input_dev *idev;
 	int ret;
 
-	if (!dmi_check_system(hdaps_whitelist)) {
-		printk(KERN_WARNING "hdaps: supported laptop not found!\n");
-		ret = -ENODEV;
-		goto out;
-	}
-
-	if (!request_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS, "hdaps")) {
-		ret = -ENXIO;
-		goto out;
-	}
-
+	/* Determine axis orientation orientation */
+	if (hdaps_invert == HDAPS_ORIENT_UNDEFINED) /* set by module param? */
+		if (dmi_check_system(hdaps_whitelist) < 1) /* in whitelist? */
+			hdaps_invert = 0; /* default */
+
+	/* Init timer before platform_driver_register, in case of suspend */
+	init_timer(&hdaps_timer);
+	hdaps_timer.function = hdaps_mousedev_poll;
 	ret = platform_driver_register(&hdaps_driver);
 	if (ret)
-		goto out_region;
+		goto out;
 
 	pdev = platform_device_register_simple("hdaps", -1, NULL, 0);
 	if (IS_ERR(pdev)) {
@@ -566,47 +829,79 @@ static int __init hdaps_init(void)
 	if (ret)
 		goto out_device;
 
-	hdaps_idev = input_allocate_polled_device();
+	hdaps_idev = input_allocate_device();
 	if (!hdaps_idev) {
 		ret = -ENOMEM;
 		goto out_group;
 	}
 
-	hdaps_idev->poll = hdaps_mousedev_poll;
-	hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL;
+	hdaps_idev_raw = input_allocate_device();
+	if (!hdaps_idev_raw) {
+		ret = -ENOMEM;
+		goto out_idev_first;
+	}
 
-	/* initial calibrate for the input device */
-	hdaps_calibrate();
+	/* calibration for the input device (deferred to avoid delay) */
+	needs_calibration = 1;
 
-	/* initialize the input class */
-	idev = hdaps_idev->input;
-	idev->name = "hdaps";
-	idev->phys = "isa1600/input0";
-	idev->id.bustype = BUS_ISA;
-	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT_MASK(EV_ABS);
-	input_set_abs_params(idev, ABS_X,
+	/* initialize the joystick-like fuzzed input device */
+	hdaps_idev->name = "ThinkPad HDAPS joystick emulation";
+	hdaps_idev->phys = "hdaps/input0";
+	hdaps_idev->id.bustype = BUS_HOST;
+	hdaps_idev->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev->id.version = HDAPS_INPUT_JS_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev->evbit[0] = BIT(EV_ABS);
+	hdaps_idev->open = hdaps_mousedev_open;
+	hdaps_idev->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
-	input_set_abs_params(idev, ABS_Y,
+	input_set_abs_params(hdaps_idev, ABS_Y,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 
-	ret = input_register_polled_device(hdaps_idev);
+	ret = input_register_device(hdaps_idev);
 	if (ret)
 		goto out_idev;
 
+	/* initialize the raw data input device */
+	hdaps_idev_raw->name = "ThinkPad HDAPS accelerometer data";
+	hdaps_idev_raw->phys = "hdaps/input1";
+	hdaps_idev_raw->id.bustype = BUS_HOST;
+	hdaps_idev_raw->id.vendor  = HDAPS_INPUT_VENDOR;
+	hdaps_idev_raw->id.product = HDAPS_INPUT_PRODUCT;
+	hdaps_idev_raw->id.version = HDAPS_INPUT_RAW_VERSION;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+	hdaps_idev_raw->cdev.dev = &pdev->dev;
+#endif
+	hdaps_idev_raw->evbit[0] = BIT(EV_ABS);
+	hdaps_idev_raw->open = hdaps_mousedev_open;
+	hdaps_idev_raw->close = hdaps_mousedev_close;
+	input_set_abs_params(hdaps_idev_raw, ABS_X, -32768, 32767, 0, 0);
+	input_set_abs_params(hdaps_idev_raw, ABS_Y, -32768, 32767, 0, 0);
+
+	ret = input_register_device(hdaps_idev_raw);
+	if (ret)
+		goto out_idev_reg_first;
+
 	printk(KERN_INFO "hdaps: driver successfully loaded.\n");
 	return 0;
 
+out_idev_reg_first:
+	input_unregister_device(hdaps_idev);
 out_idev:
-	input_free_polled_device(hdaps_idev);
+	input_free_device(hdaps_idev_raw);
+out_idev_first:
+	input_free_device(hdaps_idev);
 out_group:
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 out_device:
 	platform_device_unregister(pdev);
 out_driver:
 	platform_driver_unregister(&hdaps_driver);
-out_region:
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
+	hdaps_device_shutdown();
 out:
 	printk(KERN_WARNING "hdaps: driver init failed (ret=%d)!\n", ret);
 	return ret;
@@ -614,12 +909,12 @@ out:
 
 static void __exit hdaps_exit(void)
 {
-	input_unregister_polled_device(hdaps_idev);
-	input_free_polled_device(hdaps_idev);
+	input_unregister_device(hdaps_idev_raw);
+	input_unregister_device(hdaps_idev);
+	hdaps_device_shutdown(); /* ignore errors, effect is negligible */
 	sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group);
 	platform_device_unregister(pdev);
 	platform_driver_unregister(&hdaps_driver);
-	release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS);
 
 	printk(KERN_INFO "hdaps: driver unloaded.\n");
 }
@@ -627,8 +922,8 @@ static void __exit hdaps_exit(void)
 module_init(hdaps_init);
 module_exit(hdaps_exit);
 
-module_param_named(invert, hdaps_invert, bool, 0);
-MODULE_PARM_DESC(invert, "invert data along each axis");
+module_param_named(invert, hdaps_invert, uint, 0);
+MODULE_PARM_DESC(invert, "axis orientation code");
 
 MODULE_AUTHOR("Robert Love");
 MODULE_DESCRIPTION("IBM Hard Drive Active Protection System (HDAPS) driver");
diff -Npur linux-2.6-block/drivers/macintosh/via-pmu.c linux-2.6-block-custom/drivers/macintosh/via-pmu.c
--- linux-2.6-block/drivers/macintosh/via-pmu.c	2008-09-27 16:12:15.962939563 +0900
+++ linux-2.6-block-custom/drivers/macintosh/via-pmu.c	2008-09-26 19:45:20.362785844 +0900
@@ -40,7 +40,6 @@
 #include <linux/interrupt.h>
 #include <linux/device.h>
 #include <linux/sysdev.h>
-#include <linux/freezer.h>
 #include <linux/syscalls.h>
 #include <linux/suspend.h>
 #include <linux/cpu.h>
diff -Npur linux-2.6-block/drivers/misc/Kconfig linux-2.6-block-custom/drivers/misc/Kconfig
--- linux-2.6-block/drivers/misc/Kconfig	2008-09-27 16:12:17.550896902 +0900
+++ linux-2.6-block-custom/drivers/misc/Kconfig	2008-09-26 19:45:38.663768510 +0900
@@ -475,4 +475,24 @@ config SGI_GRU_DEBUG
 	This option enables addition debugging code for the SGI GRU driver. If
 	you are unsure, say N.
 
+config THINKPAD_EC
+	tristate
+	depends on X86
+	  ---help---
+	  This is a low-level driver for accessing the ThinkPad H8S embedded
+	  controller over the LPC bus (not to be confused with the ACPI Embedded
+	  Controller interface).
+
+config TP_SMAPI
+	tristate "ThinkPad SMAPI Support"
+	depends on X86
+	select THINKPAD_EC
+	default n
+	help
+	  This adds SMAPI support on Lenovo/IBM ThinkPads, for features such
+	  as battery charging control. For more information about this driver
+	  see <http://www.thinkwiki.org/wiki/tp_smapi>.
+
+	  If you have a Lenovo/IBM ThinkPad laptop, say Y or M here.
+
 endif # MISC_DEVICES
diff -Npur linux-2.6-block/drivers/misc/Makefile linux-2.6-block-custom/drivers/misc/Makefile
--- linux-2.6-block/drivers/misc/Makefile	2008-09-27 16:12:17.550896902 +0900
+++ linux-2.6-block-custom/drivers/misc/Makefile	2008-09-26 19:45:38.663768510 +0900
@@ -30,3 +30,5 @@ obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
 obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
+obj-$(CONFIG_THINKPAD_EC)       += thinkpad_ec.o
+obj-$(CONFIG_TP_SMAPI)          += tp_smapi.o
diff -Npur linux-2.6-block/drivers/misc/thinkpad_ec.c linux-2.6-block-custom/drivers/misc/thinkpad_ec.c
--- linux-2.6-block/drivers/misc/thinkpad_ec.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/drivers/misc/thinkpad_ec.c	2008-09-26 20:27:54.819791063 +0900
@@ -0,0 +1,489 @@
+/*
+ *  thinkpad_ec.c - ThinkPad embedded controller LPC3 functions
+ *
+ *  The embedded controller on ThinkPad laptops has a non-standard interface,
+ *  where LPC channel 3 of the H8S EC chip is hooked up to IO ports
+ *  0x1600-0x161F and implements (a special case of) the H8S LPC protocol.
+ *  The EC LPC interface provides various system management services (currently
+ *  known: battery information and accelerometer readouts). This driver
+ *  provides access and mutual exclusion for the EC interface.
+*
+ *  The LPC protocol and terminology is documented here:
+ *  "H8S/2104B Group Hardware Manual",
+ *  http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
+ *
+ *  Copyright (C) 2006-2007 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/jiffies.h>
+#include <linux/semaphore.h>
+#include <asm/io.h>
+
+#define TP_VERSION "0.37"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION("ThinkPad embedded controller hardware access");
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+/* IO ports used by embedded controller LPC channel 3: */
+#define TPC_BASE_PORT 0x1600
+#define TPC_NUM_PORTS 0x20
+#define TPC_STR3_PORT 0x1604  /* Reads H8S EC register STR3 */
+#define TPC_TWR0_PORT  0x1610 /* Mapped to H8S EC register TWR0MW/SW  */
+#define TPC_TWR15_PORT 0x161F /* Mapped to H8S EC register TWR15. */
+  /* (and port TPC_TWR0_PORT+i is mapped to H8S reg TWRi for 0<i<16) */
+
+/* H8S STR3 status flags (see "H8S/2104B Group Hardware Manual" p.549) */
+#define H8S_STR3_IBF3B 0x80  /* Bidi. Data Register Input Buffer Full */
+#define H8S_STR3_OBF3B 0x40  /* Bidi. Data Register Output Buffer Full */
+#define H8S_STR3_MWMF  0x20  /* Master Write Mode Flag */
+#define H8S_STR3_SWMF  0x10  /* Slave Write Mode Flag */
+#define H8S_STR3_MASK  0xF0  /* All bits we care about in STR3 */
+
+/* Timeouts and retries */
+#define TPC_READ_RETRIES     150
+#define TPC_READ_NDELAY      500
+#define TPC_REQUEST_RETRIES 1000
+#define TPC_REQUEST_NDELAY    10
+#define TPC_PREFETCH_TIMEOUT   (HZ/10)  /* invalidate prefetch after 0.1sec */
+
+/* A few macros for printk()ing: */
+#define MSG_FMT(fmt, args...) \
+  "thinkpad_ec: %s: " fmt "\n", __func__, ## args
+#define REQ_FMT(msg, code) \
+  MSG_FMT("%s: (0x%02x:0x%02x)->0x%02x", \
+	  msg, args->val[0x0], args->val[0xF], code)
+
+/* State of request prefetching: */
+static u8 prefetch_arg0, prefetch_argF;           /* Args of last prefetch */
+static u64 prefetch_jiffies;                      /* time of prefetch, or: */
+#define TPC_PREFETCH_NONE   INITIAL_JIFFIES       /*   No prefetch */
+#define TPC_PREFETCH_JUNK   (INITIAL_JIFFIES+1)   /*   Ignore prefetch */
+
+/* Locking: */
+
+static DECLARE_MUTEX(thinkpad_ec_mutex);
+
+/**
+ * thinkpad_ec_lock - get lock on the ThinkPad EC
+ *
+ * Get exclusive lock for accesing the ThinkPad embedded controller LPC3
+ * interface. Returns 0 iff lock acquired.
+ */
+int thinkpad_ec_lock(void)
+{
+	int ret;
+	ret = down_interruptible(&thinkpad_ec_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_lock);
+
+/**
+ * thinkpad_ec_try_lock - try getting lock on the ThinkPad EC
+ *
+ * Try getting an exclusive lock for accesing the ThinkPad embedded
+ * controller LPC3. Returns immediately if lock is not available; neither
+ * blocks nor sleeps. Returns 0 iff lock acquired .
+ */
+int thinkpad_ec_try_lock(void)
+{
+	return down_trylock(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_lock);
+
+/**
+ * thinkpad_ec_unlock - release lock on ThinkPad EC
+ *
+ * Release a previously acquired exclusive lock on the ThinkPad ebmedded
+ * controller LPC3 interface.
+ */
+void thinkpad_ec_unlock(void)
+{
+	up(&thinkpad_ec_mutex);
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_unlock);
+
+/**
+ * thinkpad_ec_request_row - tell embedded controller to prepare a row
+ * @args Input register arguments
+ *
+ * Requests a data row by writing to H8S LPC registers TRW0 through TWR15 (or
+ * a subset thereof) following the protocol prescribed by the "H8S/2104B Group
+ * Hardware Manual". Does sanity checks via status register STR3.
+ */
+static int thinkpad_ec_request_row(const struct thinkpad_ec_row *args)
+{
+	u8 str3;
+	int i;
+
+	/* EC protocol requires write to TWR0 (function code): */
+	if (!(args->mask & 0x0001)) {
+		printk(KERN_ERR MSG_FMT("bad args->mask=0x%02x", args->mask));
+		return -EINVAL;
+	}
+
+	/* Check initial STR3 status: */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B) { /* data already pending */
+		inb(TPC_TWR15_PORT); /* marks end of previous transaction */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC has result from unrequested transaction",
+			       str3));
+		return -EBUSY; /* EC will be ready in a few usecs */
+	} else if (str3 == H8S_STR3_SWMF) { /* busy with previous request */
+		if (prefetch_jiffies == TPC_PREFETCH_NONE)
+			printk(KERN_WARNING REQ_FMT(
+			       "EC is busy with unrequested transaction",
+			       str3));
+		return -EBUSY; /* data will be pending in a few usecs */
+	} else if (str3 != 0x00) { /* unexpected status? */
+		printk(KERN_WARNING REQ_FMT("unexpected initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Send TWR0MW: */
+	outb(args->val[0], TPC_TWR0_PORT);
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 != H8S_STR3_MWMF) { /* not accepted? */
+		printk(KERN_WARNING REQ_FMT("arg0 rejected", str3));
+		return -EIO;
+	}
+
+	/* Send TWR1 through TWR14: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((args->mask>>i)&1)
+			outb(args->val[i], TPC_TWR0_PORT+i);
+
+	/* Send TWR15 (default to 0x01). This marks end of command. */
+	outb((args->mask & 0x8000) ? args->val[0xF] : 0x01, TPC_TWR15_PORT);
+
+	/* Wait until EC starts writing its reply (~60ns on average).
+	 * Releasing locks before this happens may cause an EC hang
+	 * due to firmware bug!
+	 */
+	for (i = 0; i < TPC_REQUEST_RETRIES; i++) {
+		str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+		if (str3 & H8S_STR3_SWMF) /* EC started replying */
+			return 0;
+		else if (!(str3 & ~(H8S_STR3_IBF3B|H8S_STR3_MWMF)))
+			/* Normal progress (the EC hasn't seen the request
+			 * yet, or is processing it). Wait it out. */
+			ndelay(TPC_REQUEST_NDELAY);
+		else { /* weird EC status */
+			printk(KERN_WARNING
+			       REQ_FMT("bad end STR3", str3));
+			return -EIO;
+		}
+	}
+	printk(KERN_WARNING REQ_FMT("EC is mysteriously silent", str3));
+	return -EIO;
+}
+
+/**
+ * thinkpad_ec_read_data - read pre-requested row-data from EC
+ * @args Input register arguments of pre-requested rows
+ * @data Output register values
+ *
+ * Reads current row data from the controller, assuming it's already
+ * requested. Follows the H8S spec for register access and status checks.
+ */
+static int thinkpad_ec_read_data(const struct thinkpad_ec_row *args,
+				 struct thinkpad_ec_row *data)
+{
+	int i;
+	u8 str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	/* Once we make a request, STR3 assumes the sequence of values listed
+	 * in the following 'if' as it reads the request and writes its data.
+	 * It takes about a few dozen nanosecs total, with very high variance.
+	 */
+	if (str3 == (H8S_STR3_IBF3B|H8S_STR3_MWMF) ||
+	    str3 == 0x00 ||  /* the 0x00 is indistinguishable from idle EC! */
+	    str3 == H8S_STR3_SWMF)
+		return -EBUSY; /* not ready yet */
+	/* Finally, the EC signals output buffer full: */
+	if (str3 != (H8S_STR3_OBF3B|H8S_STR3_SWMF)) {
+		printk(KERN_WARNING
+		       REQ_FMT("bad initial STR3", str3));
+		return -EIO;
+	}
+
+	/* Read first byte (signals start of read transactions): */
+	data->val[0] = inb(TPC_TWR0_PORT);
+	/* Optionally read 14 more bytes: */
+	for (i = 1; i < TP_CONTROLLER_ROW_LEN-1; i++)
+		if ((data->mask >> i)&1)
+			data->val[i] = inb(TPC_TWR0_PORT+i);
+	/* Read last byte from 0x161F (signals end of read transaction): */
+	data->val[0xF] = inb(TPC_TWR15_PORT);
+
+	/* Readout still pending? */
+	str3 = inb(TPC_STR3_PORT) & H8S_STR3_MASK;
+	if (str3 & H8S_STR3_OBF3B)
+		printk(KERN_WARNING
+		       REQ_FMT("OBF3B=1 after read", str3));
+	/* If port 0x161F returns 0x80 too often, the EC may lock up. Warn: */
+	if (data->val[0xF] == 0x80)
+		printk(KERN_WARNING
+		       REQ_FMT("0x161F reports error", data->val[0xF]));
+	return 0;
+}
+
+/**
+ * thinkpad_ec_is_row_fetched - is the given row currently prefetched?
+ *
+ * To keep things simple we compare only the first and last args;
+ * this suffices for all known cases.
+ */
+static int thinkpad_ec_is_row_fetched(const struct thinkpad_ec_row *args)
+{
+	return (prefetch_jiffies != TPC_PREFETCH_NONE) &&
+	       (prefetch_jiffies != TPC_PREFETCH_JUNK) &&
+	       (prefetch_arg0 == args->val[0]) &&
+	       (prefetch_argF == args->val[0xF]) &&
+	       (get_jiffies_64() < prefetch_jiffies + TPC_PREFETCH_TIMEOUT);
+}
+
+/**
+ * thinkpad_ec_read_row - request and read data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Read a data row from the ThinkPad embedded controller LPC3 interface.
+ * Does fetching and retrying if needed. The row is specified by an
+ * array of 16 bytes, some of which may be undefined (but the first is
+ * mandatory). These bytes are given in @args->val[], where @args->val[i] is
+ * used iff (@args->mask>>i)&1). The resulting row data is stored in
+ * @data->val[], but is only guaranteed to be valid for indices corresponding
+ * to set bit in @data->mask. That is, if @data->mask&(1<<i)==0 then
+ * @data->val[i] is undefined.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+			 struct thinkpad_ec_row *data)
+{
+	int retries, ret;
+
+	if (thinkpad_ec_is_row_fetched(args))
+		goto read_row; /* already requested */
+
+	/* Request the row */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_request_row(args);
+		if (!ret)
+			goto read_row;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+	printk(KERN_ERR REQ_FMT("failed requesting row", ret));
+	goto out;
+
+read_row:
+	/* Read the row's data */
+	for (retries = 0; retries < TPC_READ_RETRIES; ++retries) {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			goto out;
+		if (ret != -EBUSY)
+			break;
+		ndelay(TPC_READ_NDELAY);
+	}
+
+	printk(KERN_ERR REQ_FMT("failed waiting for data", ret));
+
+out:
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_read_row);
+
+/**
+ * thinkpad_ec_try_read_row - try reading prefetched data from ThinkPad EC
+ * @args Input register arguments
+ * @data Output register values
+ *
+ * Try reading a data row from the ThinkPad embedded controller LPC3
+ * interface, if this raw was recently prefetched using
+ * thinkpad_ec_prefetch_row(). Does not fetch, retry or block.
+ * The parameters have the same meaning as in thinkpad_ec_read_row().
+ *
+ * Returns -EBUSY is data not ready and -ENODATA if row not prefetched.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+			     struct thinkpad_ec_row *data)
+{
+	int ret;
+	if (!thinkpad_ec_is_row_fetched(args)) {
+		ret = -ENODATA;
+	} else {
+		ret = thinkpad_ec_read_data(args, data);
+		if (!ret)
+			prefetch_jiffies = TPC_PREFETCH_NONE; /* eaten up */
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_try_read_row);
+
+/**
+ * thinkpad_ec_prefetch_row - prefetch data from ThinkPad EC
+ * @args Input register arguments
+ *
+ * Prefetch a data row from the ThinkPad embedded controller LCP3
+ * interface. A subsequent call to thinkpad_ec_read_row() with the
+ * same arguments will be faster, and a subsequent call to
+ * thinkpad_ec_try_read_row() stands a good chance of succeeding if
+ * done neither too soon nor too late. See
+ * thinkpad_ec_read_row() for the meaning of @args.
+ *
+ * Returns -EBUSY on transient error and -EIO on abnormal condition.
+ * Caller must hold controller lock.
+ */
+int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args)
+{
+	int ret;
+	ret = thinkpad_ec_request_row(args);
+	if (ret) {
+		prefetch_jiffies = TPC_PREFETCH_JUNK;
+	} else {
+		prefetch_jiffies = get_jiffies_64();
+		prefetch_arg0 = args->val[0x0];
+		prefetch_argF = args->val[0xF];
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_prefetch_row);
+
+/**
+ * thinkpad_ec_invalidate - invalidate prefetched ThinkPad EC data
+ *
+ * Invalidate the data prefetched via thinkpad_ec_prefetch_row() from the
+ * ThinkPad embedded controller LPC3 interface.
+ * Must be called before unlocking by any code that accesses the controller
+ * ports directly.
+ */
+void thinkpad_ec_invalidate(void)
+{
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+}
+EXPORT_SYMBOL_GPL(thinkpad_ec_invalidate);
+
+
+/*** Checking for EC hardware ***/
+
+/**
+ * thinkpad_ec_test - verify the EC is present and follows protocol
+ *
+ * Ensure the EC LPC3 channel really works on this machine by making
+ * an EC request and seeing if the EC follows the documented H8S protocol.
+ * The requested row just reads battery status, so it should be harmless to
+ * access it (on a correct EC).
+ * This test writes to IO ports, so execute only after checking DMI.
+ */
+static int __init thinkpad_ec_test(void)
+{
+	int ret;
+	const struct thinkpad_ec_row args = /* battery 0 basic status */
+	  { .mask = 0x8001, .val = {0x01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x00} };
+	struct thinkpad_ec_row data = { .mask = 0x0000 };
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	return ret;
+}
+
+/* Search all DMI device names of a given type for a substring */
+static int __init dmi_find_substring(int type, const char *substr)
+{
+	const struct dmi_device *dev = NULL;
+	while ((dev = dmi_find_device(type, NULL, dev))) {
+		if (strstr(dev->name, substr))
+			return 1;
+	}
+	return 0;
+}
+
+#define TP_DMI_MATCH(vendor,model)	{		\
+	.ident = vendor " " model,			\
+	.matches = {					\
+		DMI_MATCH(DMI_BOARD_VENDOR, vendor),	\
+		DMI_MATCH(DMI_PRODUCT_VERSION, model)	\
+	}						\
+}
+
+/* Check DMI for existence of ThinkPad embedded controller */
+static int __init check_dmi_for_ec(void)
+{
+	/* A few old models that have a good EC but don't report it in DMI */
+	struct dmi_system_id tp_whitelist[] = {
+		TP_DMI_MATCH("IBM", "ThinkPad A30"),
+		TP_DMI_MATCH("IBM", "ThinkPad T23"),
+		TP_DMI_MATCH("IBM", "ThinkPad X24"),
+		{ .ident = NULL }
+	};
+	return dmi_find_substring(DMI_DEV_TYPE_OEM_STRING,
+				  "IBM ThinkPad Embedded Controller") ||
+	       dmi_check_system(tp_whitelist);
+}
+
+/*** Init and cleanup ***/
+
+static int __init thinkpad_ec_init(void)
+{
+	if (!check_dmi_for_ec()) {
+		printk(KERN_WARNING
+		       "thinkpad_ec: no ThinkPad embedded controller!\n");
+		return -ENODEV;
+	}
+
+	if (!request_region(TPC_BASE_PORT, TPC_NUM_PORTS,
+			    "thinkpad_ec")) {
+		printk(KERN_ERR "thinkpad_ec: cannot claim io ports %#x-%#x\n",
+		       TPC_BASE_PORT,
+		       TPC_BASE_PORT + TPC_NUM_PORTS - 1);
+		return -ENXIO;
+	}
+	prefetch_jiffies = TPC_PREFETCH_JUNK;
+	if (thinkpad_ec_test()) {
+		printk(KERN_ERR "thinkpad_ec: initial ec test failed\n");
+		release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+		return -ENXIO;
+	}
+	printk(KERN_INFO "thinkpad_ec: thinkpad_ec " TP_VERSION " loaded.\n");
+	return 0;
+}
+
+static void __exit thinkpad_ec_exit(void)
+{
+	release_region(TPC_BASE_PORT, TPC_NUM_PORTS);
+	printk(KERN_INFO "thinkpad_ec: unloaded.\n");
+}
+
+module_init(thinkpad_ec_init);
+module_exit(thinkpad_ec_exit);
diff -Npur linux-2.6-block/drivers/misc/tp_smapi.c linux-2.6-block-custom/drivers/misc/tp_smapi.c
--- linux-2.6-block/drivers/misc/tp_smapi.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/drivers/misc/tp_smapi.c	2008-09-26 19:45:38.663768510 +0900
@@ -0,0 +1,1477 @@
+/*
+ *  tp_smapi.c - ThinkPad SMAPI support
+ *
+ *  This driver exposes some features of the System Management Application
+ *  Program Interface (SMAPI) BIOS found on ThinkPad laptops. It works on
+ *  models in which the SMAPI BIOS runs in SMM and is invoked by writing
+ *  to the APM control port 0xB2. Older models use a different interface;
+ *  for those, try the out-of-tree "thinkpad" module from "tpctl".
+ *  It also exposes battery status information, obtained from the ThinkPad
+ *  embedded controller (via the thinkpad_ec module).
+ *
+ *  Many of the battery status values obtained from the EC simply mirror
+ *  values provided by the battery's Smart Battery System (SBS) interface, so
+ *  their meaning is defined by the Smart Battery Data Specification.
+ *  References to this SBS spec are given in the code where relevant.
+ *
+ *  Copyright (C) 2006 Shem Multinymous <multinymous@gmail.com>.
+ *  SMAPI access code based on the mwave driver by Mike Sullivan.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/mc146818rtc.h>	/* CMOS defines */
+#include <linux/delay.h>
+#include <linux/version.h>
+#include <linux/thinkpad_ec.h>
+#include <linux/platform_device.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#define TP_VERSION "0.37"
+#define TP_DESC "ThinkPad SMAPI Support"
+#define TP_DIR "smapi"
+
+MODULE_AUTHOR("Shem Multinymous");
+MODULE_DESCRIPTION(TP_DESC);
+MODULE_VERSION(TP_VERSION);
+MODULE_LICENSE("GPL");
+
+static struct platform_device *pdev;
+
+static int tp_debug;
+module_param_named(debug, tp_debug, int, 0600);
+MODULE_PARM_DESC(debug, "Debug level (0=off, 1=on)");
+
+/* A few macros for printk()ing: */
+#define TPRINTK(level, fmt, args...) \
+  dev_printk(level, &(pdev->dev), "%s: " fmt "\n", __func__, ## args)
+#define DPRINTK(fmt, args...) \
+  do { if (tp_debug) TPRINTK(KERN_DEBUG, fmt, ## args); } while (0)
+
+/*********************************************************************
+ * SMAPI interface
+ */
+
+/* SMAPI functions (register BX when making the SMM call). */
+#define SMAPI_GET_INHIBIT_CHARGE                0x2114
+#define SMAPI_SET_INHIBIT_CHARGE                0x2115
+#define SMAPI_GET_THRESH_START                  0x2116
+#define SMAPI_SET_THRESH_START                  0x2117
+#define SMAPI_GET_FORCE_DISCHARGE               0x2118
+#define SMAPI_SET_FORCE_DISCHARGE               0x2119
+#define SMAPI_GET_THRESH_STOP                   0x211a
+#define SMAPI_SET_THRESH_STOP                   0x211b
+
+/* SMAPI error codes (see ThinkPad 770 Technical Reference Manual p.83 at
+ http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD */
+#define SMAPI_RETCODE_EOF 0xff
+static struct { u8 rc; char *msg; int ret; } smapi_retcode[] =
+{
+	{0x00, "OK", 0},
+	{0x53, "SMAPI fuction is not available", -ENXIO},
+	{0x81, "Invalid parameter", -EINVAL},
+	{0x86, "Function is not supported by SMAPI BIOS", -EOPNOTSUPP},
+	{0x90, "System error", -EIO},
+	{0x91, "System is invalid", -EIO},
+	{0x92, "System is busy, -EBUSY"},
+	{0xa0, "Device error (disk read error)", -EIO},
+	{0xa1, "Device is busy", -EBUSY},
+	{0xa2, "Device is not attached", -ENXIO},
+	{0xa3, "Device is disbled", -EIO},
+	{0xa4, "Request parameter is out of range", -EINVAL},
+	{0xa5, "Request parameter is not accepted", -EINVAL},
+	{0xa6, "Transient error", -EBUSY}, /* ? */
+	{SMAPI_RETCODE_EOF, "Unknown error code", -EIO}
+};
+
+
+#define SMAPI_MAX_RETRIES 10
+#define SMAPI_PORT2 0x4F           /* fixed port, meaning unclear */
+static unsigned short smapi_port;  /* APM control port, normally 0xB2 */
+
+static DECLARE_MUTEX(smapi_mutex);
+
+/**
+ * find_smapi_port - read SMAPI port from NVRAM
+ */
+static int __init find_smapi_port(void)
+{
+	u16 smapi_id = 0;
+	unsigned short port = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	smapi_id = CMOS_READ(0x7C);
+	smapi_id |= (CMOS_READ(0x7D) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+
+	if (smapi_id != 0x5349) {
+		printk(KERN_ERR "SMAPI not supported (ID=0x%x)\n", smapi_id);
+		return -ENXIO;
+	}
+	spin_lock_irqsave(&rtc_lock, flags);
+	port = CMOS_READ(0x7E);
+	port |= (CMOS_READ(0x7F) << 8);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	if (port == 0) {
+		printk(KERN_ERR "unable to read SMAPI port number\n");
+		return -ENXIO;
+	}
+	return port;
+}
+
+/**
+ * smapi_request - make a SMAPI call
+ * @inEBX, @inECX, @inEDI, @inESI: input registers
+ * @outEBX, @outECX, @outEDX, @outEDI, @outESI: outputs registers
+ * @msg: textual error message
+ * Invokes the SMAPI SMBIOS with the given input and outpu args.
+ * All outputs are optional (can be %NULL).
+ * Returns 0 when successful, and a negative errno constant
+ * (see smapi_retcode above) upon failure.
+ */
+static int smapi_request(u32 inEBX, u32 inECX,
+			 u32 inEDI, u32 inESI,
+			 u32 *outEBX, u32 *outECX, u32 *outEDX,
+			 u32 *outEDI, u32 *outESI, const char **msg)
+{
+	int ret = 0;
+	int i;
+	int retries;
+	u8 rc;
+	/* Must use local vars for output regs, due to reg pressure. */
+	u32 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI;
+
+	for (retries = 0; retries < SMAPI_MAX_RETRIES; ++retries) {
+		DPRINTK("req_in: BX=%x CX=%x DI=%x SI=%x",
+			inEBX, inECX, inEDI, inESI);
+
+		/* SMAPI's SMBIOS call and thinkpad_ec end up using use
+		 * different interfaces to the same chip, so play it safe. */
+		ret = thinkpad_ec_lock();
+		if (ret)
+			return ret;
+
+		__asm__ __volatile__(
+			"movl  $0x00005380,%%eax\n\t"
+			"movl  %6,%%ebx\n\t"
+			"movl  %7,%%ecx\n\t"
+			"movl  %8,%%edi\n\t"
+			"movl  %9,%%esi\n\t"
+			"xorl  %%edx,%%edx\n\t"
+			"movw  %10,%%dx\n\t"
+			"out   %%al,%%dx\n\t"  /* trigger SMI to SMBIOS */
+			"out   %%al,$0x4F\n\t"
+			"movl  %%eax,%0\n\t"
+			"movl  %%ebx,%1\n\t"
+			"movl  %%ecx,%2\n\t"
+			"movl  %%edx,%3\n\t"
+			"movl  %%edi,%4\n\t"
+			"movl  %%esi,%5\n\t"
+			:"=m"(tmpEAX),
+			 "=m"(tmpEBX),
+			 "=m"(tmpECX),
+			 "=m"(tmpEDX),
+			 "=m"(tmpEDI),
+			 "=m"(tmpESI)
+			:"m"(inEBX), "m"(inECX), "m"(inEDI), "m"(inESI),
+			 "m"((u16)smapi_port)
+			:"%eax", "%ebx", "%ecx", "%edx", "%edi",
+			 "%esi");
+
+		thinkpad_ec_invalidate();
+		thinkpad_ec_unlock();
+
+		/* Don't let the next SMAPI access happen too quickly,
+		 * may case problems. (We're hold smapi_mutex).       */
+		msleep(50);
+
+		if (outEBX) *outEBX = tmpEBX;
+		if (outECX) *outECX = tmpECX;
+		if (outEDX) *outEDX = tmpEDX;
+		if (outESI) *outESI = tmpESI;
+		if (outEDI) *outEDI = tmpEDI;
+
+		/* Look up error code */
+		rc = (tmpEAX>>8)&0xFF;
+		for (i = 0; smapi_retcode[i].rc != SMAPI_RETCODE_EOF &&
+			    smapi_retcode[i].rc != rc; ++i) {}
+		ret = smapi_retcode[i].ret;
+		if (msg)
+			*msg = smapi_retcode[i].msg;
+
+		DPRINTK("req_out: AX=%x BX=%x CX=%x DX=%x DI=%x SI=%x r=%d",
+			 tmpEAX, tmpEBX, tmpECX, tmpEDX, tmpEDI, tmpESI, ret);
+		if (ret)
+			TPRINTK(KERN_NOTICE, "SMAPI error: %s (func=%x)",
+				smapi_retcode[i].msg, inEBX);
+
+		if (ret != -EBUSY)
+			return ret;
+	}
+	return ret;
+}
+
+/* Convenience wrapper: discard output arguments */
+static int smapi_write(u32 inEBX, u32 inECX,
+		       u32 inEDI, u32 inESI, const char **msg)
+{
+	return smapi_request(inEBX, inECX, inEDI, inESI,
+			     NULL, NULL, NULL, NULL, NULL, msg);
+}
+
+
+/*********************************************************************
+ * Specific SMAPI services
+ * All of these functions return 0 upon success, and a negative errno
+ * constant (see smapi_retcode) on failure.
+ */
+
+enum thresh_type {
+	THRESH_STOP  = 0, /* the code assumes this is 0 for brevity */
+	THRESH_START
+};
+#define THRESH_NAME(which) ((which == THRESH_START) ? "start" : "stop")
+
+/**
+ * __get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default 1..99, 0=default (pass this as-is to SMAPI)
+ * @outEDI: some additional state that needs to be preserved, meaning unknown
+ * @outESI: some additional state that needs to be preserved, meaning unknown
+ */
+static int __get_real_thresh(int bat, enum thresh_type which, int *thresh,
+			     u32 *outEDI, u32 *outESI)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_GET_THRESH_START
+					  : SMAPI_GET_THRESH_STOP;
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(ebx, ecx, 0, 0, NULL,
+				&ecx, NULL, outEDI, outESI, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: %s",
+			THRESH_NAME(which), bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x00000100)) {
+		TPRINTK(KERN_NOTICE, "cannot get %s_thresh of bat=%d: ecx=0%x",
+			THRESH_NAME(which), bat, ecx);
+		return -EIO;
+	}
+	if (thresh)
+		*thresh = ecx&0xFF;
+	return 0;
+}
+
+/**
+ * get_real_thresh - read battery charge start/stop threshold from SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int get_real_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	return __get_real_thresh(bat, which, thresh, NULL, NULL);
+}
+
+/**
+ * set_real_thresh - write battery start/top charge threshold to SMAPI
+ * @bat:    battery number (0 or 1)
+ * @which:  THRESH_START or THRESH_STOP
+ * @thresh: 1..99, 0=default (passes as-is to SMAPI)
+ */
+static int set_real_thresh(int bat, enum thresh_type which, int thresh)
+{
+	u32 ebx = (which == THRESH_START) ? SMAPI_SET_THRESH_START
+					  : SMAPI_SET_THRESH_STOP;
+	u32 ecx = ((bat+1)<<8) + thresh;
+	u32 getDI, getSI;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_real_thresh(bat, which, NULL, &getDI, &getSI);
+	if (ret)
+		return ret;
+
+	ret = smapi_write(ebx, ecx, getDI, getSI, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set %s to %d for bat=%d failed: %s",
+			THRESH_NAME(which), thresh, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set %s to %d for bat=%d",
+			THRESH_NAME(which), thresh, bat);
+	return ret;
+}
+
+/**
+ * __get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * @outECX: some additional state that needs to be preserved, meaning unknown
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int __get_inhibit_charge_minutes(int bat, int *minutes, u32 *outECX)
+{
+	u32 ecx = (bat+1)<<8;
+	u32 esi;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_INHIBIT_CHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, &esi, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (!(ecx&0x0100)) {
+		TPRINTK(KERN_NOTICE, "bad ecx=0x%x for bat=%d", ecx, bat);
+		return -EIO;
+	}
+	if (minutes)
+		*minutes = (ecx&0x0001)?esi:0;
+	if (outECX)
+		*outECX = ecx;
+	return 0;
+}
+
+/**
+ * get_inhibit_charge_minutes - get inhibit charge period from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ * Note that @minutes is the originally set value, it does not count down.
+ */
+static int get_inhibit_charge_minutes(int bat, int *minutes)
+{
+	return __get_inhibit_charge_minutes(bat, minutes, NULL);
+}
+
+/**
+ * set_inhibit_charge_minutes - write inhibit charge period to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @minutes: period in minutes (1..65535 minutes, 0=disabled)
+ */
+static int set_inhibit_charge_minutes(int bat, int minutes)
+{
+	u32 ecx;
+	const char *msg;
+	int ret;
+
+	/* verify read before writing */
+	ret = __get_inhibit_charge_minutes(bat, NULL, &ecx);
+	if (ret)
+		return ret;
+
+	ecx = ((bat+1)<<8) | (ecx&0x00FE) | (minutes > 0 ? 0x0001 : 0x0000);
+	if (minutes > 0xFFFF)
+		minutes = 0xFFFF;
+	ret = smapi_write(SMAPI_SET_INHIBIT_CHARGE, ecx, 0, minutes, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE,
+			"set to %d failed for bat=%d: %s", minutes, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d\n", minutes, bat);
+	return ret;
+}
+
+
+/**
+ * get_force_discharge - get status of forced discharging from SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int get_force_discharge(int bat, int *enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	*enabled = (!(ecx&0x00000100) && (ecx&0x00000001))?1:0;
+	return 0;
+}
+
+/**
+ * set_force_discharge - write status of forced discharging to SMAPI
+ * @bat:     battery number (0 or 1)
+ * @enabled: 1 if forced discharged is enabled, 0 if not
+ */
+static int set_force_discharge(int bat, int enabled)
+{
+	u32 ecx = (bat+1)<<8;
+	const char *msg;
+	int ret = smapi_request(SMAPI_GET_FORCE_DISCHARGE, ecx, 0, 0,
+				NULL, &ecx, NULL, NULL, NULL, &msg);
+	if (ret) {
+		TPRINTK(KERN_NOTICE, "get failed for bat=%d: %s", bat, msg);
+		return ret;
+	}
+	if (ecx&0x00000100) {
+		TPRINTK(KERN_NOTICE, "cannot force discharge bat=%d", bat);
+		return -EIO;
+	}
+
+	ecx = ((bat+1)<<8) | (ecx&0x000000FA) | (enabled?0x00000001:0);
+	ret = smapi_write(SMAPI_SET_FORCE_DISCHARGE, ecx, 0, 0, &msg);
+	if (ret)
+		TPRINTK(KERN_NOTICE, "set to %d failed for bat=%d: %s",
+			enabled, bat, msg);
+	else
+		TPRINTK(KERN_INFO, "set to %d for bat=%d", enabled, bat);
+	return ret;
+}
+
+
+/*********************************************************************
+ * Wrappers to threshold-related SMAPI functions, which handle default
+ * thresholds and related quirks.
+ */
+
+/* Minimum, default and minimum difference for battery charging thresholds: */
+#define MIN_THRESH_DELTA      4  /* Min delta between start and stop thresh */
+#define MIN_THRESH_START      2
+#define MAX_THRESH_START      (100-MIN_THRESH_DELTA)
+#define MIN_THRESH_STOP       (MIN_THRESH_START + MIN_THRESH_DELTA)
+#define MAX_THRESH_STOP       100
+#define DEFAULT_THRESH_START  MAX_THRESH_START
+#define DEFAULT_THRESH_STOP   MAX_THRESH_STOP
+
+/* The GUI of IBM's Battery Maximizer seems to show a start threshold that
+ * is 1 more than the value we set/get via SMAPI. Since the threshold is
+ * maintained across reboot, this can be confusing. So we kludge our
+ * interface for interoperability: */
+#define BATMAX_FIX   1
+
+/* Get charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int get_thresh(int bat, enum thresh_type which, int *thresh)
+{
+	int ret = get_real_thresh(bat, which, thresh);
+	if (ret)
+		return ret;
+	if (*thresh == 0)
+		*thresh = (which == THRESH_START) ? DEFAULT_THRESH_START
+						  : DEFAULT_THRESH_STOP;
+	else if (which == THRESH_START)
+		*thresh += BATMAX_FIX;
+	return 0;
+}
+
+
+/* Set charge start/stop threshold (1..100),
+ * substituting default values if needed and applying BATMAT_FIX. */
+static int set_thresh(int bat, enum thresh_type which, int thresh)
+{
+	if (which == THRESH_STOP && thresh == DEFAULT_THRESH_STOP)
+		thresh = 0; /* 100 is out of range, but default means 100 */
+	if (which == THRESH_START)
+		thresh -= BATMAX_FIX;
+	return set_real_thresh(bat, which, thresh);
+}
+
+/*********************************************************************
+ * ThinkPad embedded controller readout and basic functions
+ */
+
+/**
+ * read_tp_ec_row - read data row from the ThinkPad embedded controller
+ * @arg0: EC command code
+ * @bat: battery number, 0 or 1
+ * @j: the byte value to be used for "junk" (unused) input/outputs
+ * @dataval: result vector
+ */
+static int read_tp_ec_row(u8 arg0, int bat, u8 j, u8 *dataval)
+{
+	int ret;
+	const struct thinkpad_ec_row args = { .mask = 0xFFFF,
+		.val = {arg0, j,j,j,j,j,j,j,j,j,j,j,j,j,j, (u8)bat} };
+	struct thinkpad_ec_row data = { .mask = 0xFFFF };
+
+	ret = thinkpad_ec_lock();
+	if (ret)
+		return ret;
+	ret = thinkpad_ec_read_row(&args, &data);
+	thinkpad_ec_unlock();
+	memcpy(dataval, &data.val, TP_CONTROLLER_ROW_LEN);
+	return ret;
+}
+
+/**
+ * power_device_present - check for presence of battery or AC power
+ * @bat: 0 for battery 0, 1 for battery 1, otherwise AC power
+ * Returns 1 if present, 0 if not present, negative if error.
+ */
+static int power_device_present(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u8 test;
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (bat) {
+	case 0:  test = 0x40; break; /* battery 0 */
+	case 1:  test = 0x20; break; /* battery 1 */
+	default: test = 0x80;        /* AC power */
+	}
+	return (row[0] & test) ? 1 : 0;
+}
+
+/**
+ * bat_has_status - check if battery can report detailed status
+ * @bat: 0 for battery 0, 1 for battery 1
+ * Returns 1 if yes, 0 if no, negative if error.
+ */
+static int bat_has_status(int bat)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	if ((row[0] & (bat?0x20:0x40)) == 0) /* no battery */
+		return 0;
+	if ((row[1] & (0x60)) == 0) /* no status */
+		return 0;
+	return 1;
+}
+
+/**
+ * get_tp_ec_bat_16 - read a 16-bit value from EC battery status data
+ * @arg0: first argument to EC
+ * @off: offset in row returned from EC
+ * @bat: battery (0 or 1)
+ * @val: the 16-bit value obtained
+ * Returns nonzero on error.
+ */
+static int get_tp_ec_bat_16(u8 arg0, int offset, int bat, u16 *val)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	*val = *(u16 *)(row+offset);
+	return 0;
+}
+
+/*********************************************************************
+ * sysfs attributes for batteries -
+ * definitions and helper functions
+ */
+
+/* A custom device attribute struct which holds a battery number */
+struct bat_device_attribute {
+	struct device_attribute dev_attr;
+	int bat;
+};
+
+/**
+ * attr_get_bat - get the battery to which the attribute belongs
+ */
+static int attr_get_bat(struct device_attribute *attr)
+{
+	return container_of(attr, struct bat_device_attribute, dev_attr)->bat;
+}
+
+/**
+ * show_tp_ec_bat_u16 - show an unsigned 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @na_msg: string to output is value not available (0xFFFFFFFF)
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as unsigned,
+ * transformed as x->mul*x, and printed to the buffer.
+ * If the value is 0xFFFFFFFF and na_msg!=%NULL, na_msg is printed instead.
+ */
+static ssize_t show_tp_ec_bat_u16(u8 arg0, int offset, int mul,
+			      const char *na_msg,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	if (na_msg && val == 0xFFFF)
+		return sprintf(buf, "%s\n", na_msg);
+	else
+		return sprintf(buf, "%u\n", mul*(unsigned int)val);
+}
+
+/**
+ * show_tp_ec_bat_s16 - show an signed 16-bit battery attribute
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @mul: correction factor to multiply by
+ * @add: correction term to add after multiplication
+ * @attr: battery attribute
+ * @buf: output buffer
+ * The 16-bit value is read from the EC, treated as signed,
+ * transformed as x->mul*x+add, and printed to the buffer.
+ */
+static ssize_t show_tp_ec_bat_s16(u8 arg0, int offset, int mul, int add,
+			      struct device_attribute *attr, char *buf)
+{
+	u16 val;
+	int ret = get_tp_ec_bat_16(arg0, offset, attr_get_bat(attr), &val);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", mul*(s16)val+add);
+}
+
+/**
+ * show_tp_ec_bat_str - show a string from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @maxlen: maximum string length
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_str(u8 arg0, int offset, int maxlen,
+			      struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int ret;
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+	strncpy(buf, (char *)row+offset, maxlen);
+	buf[maxlen] = 0;
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+/**
+ * show_tp_ec_bat_power - show a power readout from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offV: byte offset of voltage in EC raw data
+ * @offI: byte offset of current in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ * Computes the power as current*voltage from the two given readout offsets.
+ */
+static ssize_t show_tp_ec_bat_power(u8 arg0, int offV, int offI,
+				struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	int milliamp, millivolt, ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	millivolt = *(u16 *)(row+offV);
+	milliamp = *(s16 *)(row+offI);
+	return sprintf(buf, "%d\n", milliamp*millivolt/1000); /* units: mW */
+}
+
+/**
+ * show_tp_ec_bat_date - decode and show a date from EC battery status data
+ * @arg0: specified 1st argument of EC raw to read
+ * @offset: byte offset in EC raw data
+ * @attr: battery attribute
+ * @buf: output buffer
+ */
+static ssize_t show_tp_ec_bat_date(u8 arg0, int offset,
+			       struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	u16 v;
+	int ret;
+	int day, month, year;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return -ENXIO;
+	ret = read_tp_ec_row(arg0, bat, 0, row);
+	if (ret)
+		return ret;
+
+	/* Decode bit-packed: v = day | (month<<5) | ((year-1980)<<9) */
+	v = *(u16 *)(row+offset);
+	day = v & 0x1F;
+	month = (v >> 5) & 0xF;
+	year = (v >> 9) + 1980;
+
+	return sprintf(buf, "%04d-%02d-%02d\n", year, month, day);
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O for batteries -
+ * the actual attribute show/store functions
+ */
+
+static ssize_t show_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_START, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+static ssize_t show_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int thresh;
+	int bat = attr_get_bat(attr);
+	int ret = get_thresh(bat, THRESH_STOP, &thresh);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", thresh);  /* units: percent */
+}
+
+/**
+ * store_battery_start_charge_thresh - store battery_start_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_start_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_START) /* clamp up to MIN_THRESH_START */
+		thresh = MIN_THRESH_START;
+	if (thresh > MAX_THRESH_START) /* clamp down to MAX_THRESH_START */
+		thresh = MAX_THRESH_START;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_STOP, &other_thresh);
+	if (ret != -EOPNOTSUPP) {
+		if (ret) /* other threshold is set? */
+			goto out;
+		ret = get_real_thresh(bat, THRESH_START, NULL);
+		if (ret) /* this threshold is set? */
+			goto out;
+		if (other_thresh < thresh+MIN_THRESH_DELTA) {
+			/* move other thresh to keep it above this one */
+			ret = set_thresh(bat, THRESH_STOP,
+					 thresh+MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_START, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+
+}
+
+/**
+ * store_battery_stop_charge_thresh - store battery_stop_charge_thresh attr
+ * Since this is a kernel<->user interface, we ensure a valid state for
+ * the hardware. We do this by clamping the requested threshold to the
+ * valid range and, if necessary, moving the other threshold so that
+ * it's MIN_THRESH_DELTA away from this one.
+ */
+static ssize_t store_battery_stop_charge_thresh(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int thresh, other_thresh, ret;
+	int bat = attr_get_bat(attr);
+
+	if (sscanf(buf, "%d", &thresh) != 1 || thresh < 1 || thresh > 100)
+		return -EINVAL;
+
+	if (thresh < MIN_THRESH_STOP) /* clamp up to MIN_THRESH_STOP */
+		thresh = MIN_THRESH_STOP;
+
+	down(&smapi_mutex);
+	ret = get_thresh(bat, THRESH_START, &other_thresh);
+	if (ret != -EOPNOTSUPP) { /* other threshold exists? */
+		if (ret)
+			goto out;
+		/* this threshold exists? */
+		ret = get_real_thresh(bat, THRESH_STOP, NULL);
+		if (ret)
+			goto out;
+		if (other_thresh >= thresh-MIN_THRESH_DELTA) {
+			 /* move other thresh to be below this one */
+			ret = set_thresh(bat, THRESH_START,
+					 thresh-MIN_THRESH_DELTA);
+			if (ret)
+				goto out;
+		}
+	}
+	ret = set_thresh(bat, THRESH_STOP, thresh);
+out:
+	up(&smapi_mutex);
+	return count;
+}
+
+static ssize_t show_battery_inhibit_charge_minutes(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int minutes;
+	int bat = attr_get_bat(attr);
+	int ret = get_inhibit_charge_minutes(bat, &minutes);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", minutes);  /* units: minutes */
+}
+
+static ssize_t store_battery_inhibit_charge_minutes(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int ret;
+	int minutes;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &minutes) != 1 || minutes < 0) {
+		TPRINTK(KERN_ERR, "inhibit_charge_minutes: "
+			      "must be a non-negative integer");
+		return -EINVAL;
+	}
+	ret = set_inhibit_charge_minutes(bat, minutes);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	int enabled;
+	int bat = attr_get_bat(attr);
+	int ret = get_force_discharge(bat, &enabled);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%d\n", enabled);  /* type: boolean */
+}
+
+static ssize_t store_battery_force_discharge(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	int enabled;
+	int bat = attr_get_bat(attr);
+	if (sscanf(buf, "%d", &enabled) != 1 || enabled < 0 || enabled > 1)
+		return -EINVAL;
+	ret = set_force_discharge(bat, enabled);
+	if (ret)
+		return ret;
+	return count;
+}
+
+static ssize_t show_battery_installed(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int bat = attr_get_bat(attr);
+	int ret = power_device_present(bat);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret); /* type: boolean */
+}
+
+static ssize_t show_battery_state(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	u8 row[TP_CONTROLLER_ROW_LEN];
+	const char *txt;
+	int ret;
+	int bat = attr_get_bat(attr);
+	if (bat_has_status(bat) != 1)
+		return sprintf(buf, "none\n");
+	ret = read_tp_ec_row(1, bat, 0, row);
+	if (ret)
+		return ret;
+	switch (row[1] & 0xf0) {
+	case 0xc0: txt = "idle"; break;
+	case 0xd0: txt = "discharging"; break;
+	case 0xe0: txt = "charging"; break;
+	default:   return sprintf(buf, "unknown (0x%x)\n", row[1]);
+	}
+	return sprintf(buf, "%s\n", txt);  /* type: string from fixed set */
+}
+
+static ssize_t show_battery_manufacturer(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: ManufacturerName() */
+	return show_tp_ec_bat_str(4, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_model(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34: DeviceName() */
+	return show_tp_ec_bat_str(5, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_barcoding(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string */
+	return show_tp_ec_bat_str(7, 2, TP_CONTROLLER_ROW_LEN-2, attr, buf);
+}
+
+static ssize_t show_battery_chemistry(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: string. SBS spec v1.1 p34-35: DeviceChemistry() */
+	return show_tp_ec_bat_str(6, 2, 5, attr, buf);
+}
+
+static ssize_t show_battery_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p24: Voltage() */
+	return show_tp_ec_bat_u16(1, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_design_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p32: DesignVoltage() */
+	return show_tp_ec_bat_u16(3, 4, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV. SBS spec v1.1 p37,39: ChargingVoltage() */
+	return show_tp_ec_bat_u16(9, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group0_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group1_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 10, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group2_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 8, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_group3_voltage(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mV */
+	return show_tp_ec_bat_u16(0xA, 6, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_current_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+ 	/* units: mA. SBS spec v1.1 p24: Current() */
+	return show_tp_ec_bat_s16(1, 8, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_current_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p24: AverageCurrent() */
+	return show_tp_ec_bat_s16(1, 10, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_charging_max_current(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mA. SBS spec v1.1 p36,38: ChargingCurrent() */
+	return show_tp_ec_bat_s16(9, 6, 1, 0, attr, buf);
+}
+
+static ssize_t show_battery_power_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*Current() */
+	return show_tp_ec_bat_power(1, 6, 8, attr, buf);
+}
+
+static ssize_t show_battery_power_avg(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mW. SBS spec v1.1: Voltage()*AverageCurrent() */
+	return show_tp_ec_bat_power(1, 6, 10, attr, buf);
+}
+
+static ssize_t show_battery_remaining_percent(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: percent. SBS spec v1.1 p25: RelativeStateOfCharge() */
+	return show_tp_ec_bat_u16(1, 12, 1, NULL, attr, buf);
+}
+
+static ssize_t show_battery_remaining_charging_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: AverageTimeToFull() */
+	return show_tp_ec_bat_u16(2, 8, 1, "not_charging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 6, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_running_time_now(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: minutes. SBS spec v1.1 p27: RunTimeToEmpty() */
+	return show_tp_ec_bat_u16(2, 4, 1, "not_discharging", attr, buf);
+}
+
+static ssize_t show_battery_remaining_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26. */
+	return show_tp_ec_bat_u16(1, 14, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_last_full_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p26: FullChargeCapacity() */
+	return show_tp_ec_bat_u16(2, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_design_capacity(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: mWh. SBS spec v1.1 p32: DesignCapacity() */
+	return show_tp_ec_bat_u16(3, 2, 10, "", attr, buf);
+}
+
+static ssize_t show_battery_cycle_count(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: ordinal. SBS spec v1.1 p32: CycleCount() */
+	return show_tp_ec_bat_u16(2, 12, 1, "", attr, buf); 
+}
+
+static ssize_t show_battery_temperature(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* units: millicelsius. SBS spec v1.1: Temperature()*10 */
+	return show_tp_ec_bat_s16(1, 4, 100, -273100, attr, buf);
+}
+
+static ssize_t show_battery_serial(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: int. SBS spec v1.1 p34: SerialNumber() */
+	return show_tp_ec_bat_u16(3, 10, 1, "", attr, buf);
+}
+
+static ssize_t show_battery_manufacture_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD. SBS spec v1.1 p34: ManufactureDate() */
+	return show_tp_ec_bat_date(3, 8, attr, buf);
+}
+
+static ssize_t show_battery_first_use_date(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	/* type: YYYY-MM-DD */
+	return show_tp_ec_bat_date(8, 2, attr, buf);
+}
+
+/**
+ * show_battery_dump - show the battery's dump attribute
+ * The dump attribute gives a hex dump of all EC readouts related to a
+ * battery. Some of the enumerated values don't really exist (i.e., the
+ * EC function just leaves them untouched); we use a kludge to detect and
+ * denote these.
+ */
+#define MIN_DUMP_ARG0 0x00
+#define MAX_DUMP_ARG0 0x0a /* 0x0b is useful too but hangs old EC firmware */
+static ssize_t show_battery_dump(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int i;
+	char *p = buf;
+	int bat = attr_get_bat(attr);
+	u8 arg0; /* first argument to EC */
+	u8 rowa[TP_CONTROLLER_ROW_LEN],
+	   rowb[TP_CONTROLLER_ROW_LEN];
+	const u8 junka = 0xAA,
+		 junkb = 0x55; /* junk values for testing changes */
+	int ret;
+
+	for (arg0 = MIN_DUMP_ARG0; arg0 <= MAX_DUMP_ARG0; ++arg0) {
+		if ((p-buf) > PAGE_SIZE-TP_CONTROLLER_ROW_LEN*5)
+			return -ENOMEM; /* don't overflow sysfs buf */
+		/* Read raw twice with different junk values,
+		 * to detect unused output bytes which are left unchaged: */
+		ret = read_tp_ec_row(arg0, bat, junka, rowa);
+		if (ret)
+			return ret;
+		ret = read_tp_ec_row(arg0, bat, junkb, rowb);
+		if (ret)
+			return ret;
+		for (i = 0; i < TP_CONTROLLER_ROW_LEN; i++) {
+			if (rowa[i] == junka && rowb[i] == junkb)
+				p += sprintf(p, "-- "); /* unused by EC */
+			else
+				p += sprintf(p, "%02x ", rowa[i]);
+		}
+		p += sprintf(p, "\n");
+	}
+	return p-buf;
+}
+
+
+/*********************************************************************
+ * sysfs attribute I/O, other than batteries
+ */
+
+static ssize_t show_ac_connected(
+	struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int ret = power_device_present(0xFF);
+	if (ret < 0)
+		return ret;
+	return sprintf(buf, "%d\n", ret);  /* type: boolean */
+}
+
+/*********************************************************************
+ * The the "smapi_request" sysfs attribute executes a raw SMAPI call.
+ * You write to make a request and read to get the result. The state
+ * is saved globally rather than per fd (sysfs limitation), so
+ * simultaenous requests may get each other's results! So this is for
+ * development and debugging only.
+ */
+#define MAX_SMAPI_ATTR_ANSWER_LEN   128
+static char smapi_attr_answer[MAX_SMAPI_ATTR_ANSWER_LEN] = "";
+
+static ssize_t show_smapi_request(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int ret = snprintf(buf, PAGE_SIZE, "%s", smapi_attr_answer);
+	smapi_attr_answer[0] = '\0';
+	return ret;
+}
+
+static ssize_t store_smapi_request(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	unsigned int inEBX, inECX, inEDI, inESI;
+	u32 outEBX, outECX, outEDX, outEDI, outESI;
+	const char *msg;
+	int ret;
+	if (sscanf(buf, "%x %x %x %x", &inEBX, &inECX, &inEDI, &inESI) != 4) {
+		smapi_attr_answer[0] = '\0';
+		return -EINVAL;
+	}
+	ret = smapi_request(
+		   inEBX, inECX, inEDI, inESI,
+		   &outEBX, &outECX, &outEDX, &outEDI, &outESI, &msg);
+	snprintf(smapi_attr_answer, MAX_SMAPI_ATTR_ANSWER_LEN,
+		 "%x %x %x %x %x %d '%s'\n",
+		 (unsigned int)outEBX, (unsigned int)outECX,
+		 (unsigned int)outEDX, (unsigned int)outEDI,
+		 (unsigned int)outESI, ret, msg);
+	if (ret)
+		return ret;
+	else
+		return count;
+}
+
+/*********************************************************************
+ * Power management: the embedded controller forgets the battery
+ * thresholds when the system is suspended to disk and unplugged from
+ * AC and battery, so we restore it upon resume.
+ */
+
+static int saved_threshs[4] = {-1, -1, -1, -1};  /* -1 = don't know */
+
+static int tp_suspend(struct platform_device *dev, pm_message_t state)
+{
+	if (get_real_thresh(0, THRESH_STOP , &saved_threshs[0]))
+		saved_threshs[0] = -1;
+	if (get_real_thresh(0, THRESH_START, &saved_threshs[1]))
+		saved_threshs[1] = -1;
+	if (get_real_thresh(1, THRESH_STOP , &saved_threshs[2]))
+		saved_threshs[2] = -1;
+	if (get_real_thresh(1, THRESH_START, &saved_threshs[3]))
+		saved_threshs[3] = -1;
+	DPRINTK("suspend saved: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	return 0;
+}
+
+static int tp_resume(struct platform_device *dev)
+{
+	DPRINTK("resume restoring: %d %d %d %d", saved_threshs[0],
+		saved_threshs[1], saved_threshs[2], saved_threshs[3]);
+	if (saved_threshs[0] >= 0)
+		set_real_thresh(0, THRESH_STOP , saved_threshs[0]);
+	if (saved_threshs[1] >= 0)
+		set_real_thresh(0, THRESH_START, saved_threshs[1]);
+	if (saved_threshs[2] >= 0)
+		set_real_thresh(1, THRESH_STOP , saved_threshs[2]);
+	if (saved_threshs[3] >= 0)
+		set_real_thresh(1, THRESH_START, saved_threshs[3]);
+	return 0;
+}
+
+
+/*********************************************************************
+ * Driver model
+ */
+
+static struct platform_driver tp_driver = {
+	.suspend = tp_suspend,
+	.resume = tp_resume,
+	.driver = {
+		.name = "smapi",
+		.owner = THIS_MODULE
+	},
+};
+
+
+/*********************************************************************
+ * Sysfs device model
+ */
+
+/* Attributes in /sys/devices/platform/smapi/ */
+
+static DEVICE_ATTR(ac_connected, 0444, show_ac_connected, NULL);
+static DEVICE_ATTR(smapi_request, 0600, show_smapi_request,
+					store_smapi_request);
+
+static struct attribute *tp_root_attributes[] = {
+	&dev_attr_ac_connected.attr,
+	&dev_attr_smapi_request.attr,
+	NULL
+};
+static struct attribute_group tp_root_attribute_group = {
+	.attrs = tp_root_attributes
+};
+
+/* Attributes under /sys/devices/platform/smapi/BAT{0,1}/ :
+ * Every attribute needs to be defined (i.e., statically allocated) for
+ * each battery, and then referenced in the attribute list of each battery.
+ * We use preprocessor voodoo to avoid duplicating the list of attributes 4
+ * times. The preprocessor output is just normal sysfs attributes code.
+ */
+
+/**
+ * FOREACH_BAT_ATTR - invoke the given macros on all our battery attributes
+ * @_BAT:     battery number (0 or 1)
+ * @_ATTR_RW: macro to invoke for each read/write attribute
+ * @_ATTR_R:  macro to invoke for each read-only  attribute
+ */
+#define FOREACH_BAT_ATTR(_BAT, _ATTR_RW, _ATTR_R) \
+	_ATTR_RW(_BAT, start_charge_thresh) \
+	_ATTR_RW(_BAT, stop_charge_thresh) \
+	_ATTR_RW(_BAT, inhibit_charge_minutes) \
+	_ATTR_RW(_BAT, force_discharge) \
+	_ATTR_R(_BAT, installed) \
+	_ATTR_R(_BAT, state) \
+	_ATTR_R(_BAT, manufacturer) \
+	_ATTR_R(_BAT, model) \
+	_ATTR_R(_BAT, barcoding) \
+	_ATTR_R(_BAT, chemistry) \
+	_ATTR_R(_BAT, voltage) \
+	_ATTR_R(_BAT, group0_voltage) \
+	_ATTR_R(_BAT, group1_voltage) \
+	_ATTR_R(_BAT, group2_voltage) \
+	_ATTR_R(_BAT, group3_voltage) \
+	_ATTR_R(_BAT, current_now) \
+	_ATTR_R(_BAT, current_avg) \
+	_ATTR_R(_BAT, charging_max_current) \
+	_ATTR_R(_BAT, power_now) \
+	_ATTR_R(_BAT, power_avg) \
+	_ATTR_R(_BAT, remaining_percent) \
+	_ATTR_R(_BAT, remaining_charging_time) \
+	_ATTR_R(_BAT, remaining_running_time) \
+	_ATTR_R(_BAT, remaining_running_time_now) \
+	_ATTR_R(_BAT, remaining_capacity) \
+	_ATTR_R(_BAT, last_full_capacity) \
+	_ATTR_R(_BAT, design_voltage) \
+	_ATTR_R(_BAT, charging_max_voltage) \
+	_ATTR_R(_BAT, design_capacity) \
+	_ATTR_R(_BAT, cycle_count) \
+	_ATTR_R(_BAT, temperature) \
+	_ATTR_R(_BAT, serial) \
+	_ATTR_R(_BAT, manufacture_date) \
+	_ATTR_R(_BAT, first_use_date) \
+	_ATTR_R(_BAT, dump)
+
+/* Define several macros we will feed into FOREACH_BAT_ATTR: */
+
+#define DEFINE_BAT_ATTR_RW(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {  \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME,   \
+						store_battery_##_NAME), \
+		.bat = _BAT \
+	};
+
+#define DEFINE_BAT_ATTR_R(_BAT,_NAME) \
+	static struct bat_device_attribute dev_attr_##_NAME##_##_BAT = {    \
+		.dev_attr = __ATTR(_NAME, 0644, show_battery_##_NAME, 0), \
+		.bat = _BAT \
+	};
+
+#define REF_BAT_ATTR(_BAT,_NAME) \
+	&dev_attr_##_NAME##_##_BAT.dev_attr.attr,
+
+/* This provide all attributes for one battery: */
+
+#define PROVIDE_BAT_ATTRS(_BAT) \
+	FOREACH_BAT_ATTR(_BAT, DEFINE_BAT_ATTR_RW, DEFINE_BAT_ATTR_R) \
+	static struct attribute *tp_bat##_BAT##_attributes[] = { \
+		FOREACH_BAT_ATTR(_BAT, REF_BAT_ATTR, REF_BAT_ATTR) \
+		NULL \
+	}; \
+	static struct attribute_group tp_bat##_BAT##_attribute_group = { \
+		.name  = "BAT" #_BAT, \
+		.attrs = tp_bat##_BAT##_attributes \
+	};
+
+/* Finally genereate the attributes: */
+
+PROVIDE_BAT_ATTRS(0)
+PROVIDE_BAT_ATTRS(1)
+
+/* List of attribute groups */
+
+static struct attribute_group *attr_groups[] = {
+	&tp_root_attribute_group,
+	&tp_bat0_attribute_group,
+	&tp_bat1_attribute_group,
+	NULL
+};
+
+
+/*********************************************************************
+ * Init and cleanup
+ */
+
+static struct attribute_group **next_attr_group; /* next to register */
+
+static int __init tp_init(void)
+{
+	int ret;
+	printk(KERN_INFO "tp_smapi " TP_VERSION " loading...\n");
+
+	ret = find_smapi_port();
+	if (ret < 0)
+		goto err;
+	else
+		smapi_port = ret;
+
+	if (!request_region(smapi_port, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       smapi_port);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!request_region(SMAPI_PORT2, 1, "smapi")) {
+		printk(KERN_ERR "tp_smapi cannot claim port 0x%x\n",
+		       SMAPI_PORT2);
+		ret = -ENXIO;
+		goto err_port1;
+	}
+
+	ret = platform_driver_register(&tp_driver);
+	if (ret)
+		goto err_port2;
+
+	pdev = platform_device_alloc("smapi", -1);
+	if (!pdev) {
+		ret = -ENOMEM;
+		goto err_driver;
+	}
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err_device_free;
+
+	for (next_attr_group = attr_groups; *next_attr_group;
+	     ++next_attr_group) {
+		ret = sysfs_create_group(&pdev->dev.kobj, *next_attr_group);
+		if (ret)
+			goto err_attr;
+	}
+
+	printk(KERN_INFO "tp_smapi successfully loaded (smapi_port=0x%x).\n",
+	       smapi_port);
+	return 0;
+
+err_attr:
+	while (--next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+err_device_free:
+	platform_device_put(pdev);
+err_driver:
+	platform_driver_unregister(&tp_driver);
+err_port2:
+	release_region(SMAPI_PORT2, 1);
+err_port1:
+	release_region(smapi_port, 1);
+err:
+	printk(KERN_ERR "tp_smapi init failed (ret=%d)!\n", ret);
+	return ret;
+}
+
+static void __exit tp_exit(void)
+{
+	while (next_attr_group && --next_attr_group >= attr_groups)
+		sysfs_remove_group(&pdev->dev.kobj, *next_attr_group);
+	platform_device_unregister(pdev);
+	platform_driver_unregister(&tp_driver);
+	release_region(SMAPI_PORT2, 1);
+	if (smapi_port)
+		release_region(smapi_port, 1);
+
+	printk(KERN_INFO "tp_smapi unloaded.\n");
+}
+
+module_init(tp_init);
+module_exit(tp_exit);
diff -Npur linux-2.6-block/fs/Kconfig linux-2.6-block-custom/fs/Kconfig
--- linux-2.6-block/fs/Kconfig	2008-09-27 16:12:24.354945950 +0900
+++ linux-2.6-block-custom/fs/Kconfig	2008-09-26 20:30:06.842765910 +0900
@@ -1348,6 +1348,56 @@ config CRAMFS
 
 	  If unsure, say N.
 
+config SQUASHFS
+	tristate "SquashFS 3.4 - Squashed file system support"
+	select ZLIB_INFLATE
+	help
+	  Saying Y here includes support for SquashFS 3.4 (a Compressed
+	  Read-Only File System).  Squashfs is a highly compressed read-only
+	  filesystem for Linux.  It uses zlib compression to compress both
+	  files, inodes and directories.  Inodes in the system are very small
+	  and all blocks are packed to minimise data overhead. Block sizes
+	  greater than 4K are supported up to a maximum of 1 Mbytes (default
+	  block size 128K).  SquashFS 3.3 supports 64 bit filesystems and files
+	  (larger than 4GB), full uid/gid information, hard links and timestamps.  
+
+	  Squashfs is intended for general read-only filesystem use, for
+	  archival use (i.e. in cases where a .tar.gz file may be used), and in
+	  embedded systems where low overhead is needed.  Further information
+	  and filesystem tools are available from http://squashfs.sourceforge.net.
+
+	  If you want to compile this as a module ( = code which can be
+	  inserted in and removed from the running kernel whenever you want),
+	  say M here and read <file:Documentation/modules.txt>.  The module
+	  will be called squashfs.  Note that the root file system (the one
+	  containing the directory /) cannot be compiled as a module.
+
+	  If unsure, say N.
+
+config SQUASHFS_EMBEDDED
+
+	bool "Additional option for memory-constrained systems" 
+	depends on SQUASHFS
+	default n
+	help
+	  Saying Y here allows you to specify cache size.
+
+	  If unsure, say N.
+
+config SQUASHFS_FRAGMENT_CACHE_SIZE
+	int "Number of fragments cached" if SQUASHFS_EMBEDDED
+	depends on SQUASHFS
+	default "3"
+	help
+	  By default SquashFS caches the last 3 fragments read from
+	  the filesystem.  Increasing this amount may mean SquashFS
+	  has to re-read fragments less often from disk, at the expense
+	  of extra system memory.  Decreasing this amount will mean
+	  SquashFS uses less memory at the expense of extra reads from disk.
+
+	  Note there must be at least one cached fragment.  Anything
+	  much more than three will probably not make much difference.
+
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
 	depends on BLOCK
diff -Npur linux-2.6-block/fs/Makefile linux-2.6-block-custom/fs/Makefile
--- linux-2.6-block/fs/Makefile	2008-09-27 16:12:24.354945950 +0900
+++ linux-2.6-block-custom/fs/Makefile	2008-09-26 20:30:06.842765910 +0900
@@ -74,6 +74,7 @@ obj-$(CONFIG_JBD)		+= jbd/
 obj-$(CONFIG_JBD2)		+= jbd2/
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
+obj-$(CONFIG_SQUASHFS)		+= squashfs/
 obj-y				+= ramfs/
 obj-$(CONFIG_HUGETLBFS)		+= hugetlbfs/
 obj-$(CONFIG_CODA_FS)		+= coda/
diff -Npur linux-2.6-block/fs/buffer.c linux-2.6-block-custom/fs/buffer.c
--- linux-2.6-block/fs/buffer.c	2008-09-27 16:12:24.446942279 +0900
+++ linux-2.6-block-custom/fs/buffer.c	2008-09-26 19:45:20.366786422 +0900
@@ -247,6 +247,91 @@ void thaw_bdev(struct block_device *bdev
 }
 EXPORT_SYMBOL(thaw_bdev);
 
+#if 0
+#define FS_PRINTK(fmt, args...) printk(fmt, ## args)
+#else
+#define FS_PRINTK(fmt, args...)
+#endif
+
+/* #define DEBUG_FS_FREEZING */
+
+/**
+ * freeze_filesystems - lock all filesystems and force them into a consistent
+ * state
+ */
+void freeze_filesystems(int which)
+{
+	struct super_block *sb;
+
+	lockdep_off();
+
+	/*
+	 * Freeze in reverse order so filesystems dependant upon others are
+	 * frozen in the right order (eg. loopback on ext3).
+	 */
+	list_for_each_entry_reverse(sb, &super_blocks, s_list) {
+		FS_PRINTK(KERN_INFO "Considering %s.%s: (root %p, bdev %x)",
+			sb->s_type->name ? sb->s_type->name : "?",
+			sb->s_subtype ? sb->s_subtype : "", sb->s_root,
+			sb->s_bdev ? sb->s_bdev->bd_dev : 0);
+
+		if (sb->s_type->fs_flags & FS_IS_FUSE &&
+		    sb->s_frozen == SB_UNFROZEN &&
+		    which & FS_FREEZER_FUSE) {
+			sb->s_frozen = SB_FREEZE_TRANS;
+			sb->s_flags |= MS_FROZEN;
+			printk("Fuse filesystem done.\n");
+			continue;
+		}
+
+		if (!sb->s_root || !sb->s_bdev ||
+		    (sb->s_frozen == SB_FREEZE_TRANS) ||
+		    (sb->s_flags & MS_RDONLY) ||
+		    (sb->s_flags & MS_FROZEN) ||
+		    !(which & FS_FREEZER_NORMAL)) {
+			FS_PRINTK(KERN_INFO "Nope.\n");
+			continue;
+		}
+
+		FS_PRINTK(KERN_INFO "Freezing %x... ", sb->s_bdev->bd_dev);
+		freeze_bdev(sb->s_bdev);
+		sb->s_flags |= MS_FROZEN;
+		FS_PRINTK(KERN_INFO "Done.\n");
+	}
+
+	lockdep_on();
+}
+
+/**
+ * thaw_filesystems - unlock all filesystems
+ */
+void thaw_filesystems(int which)
+{
+	struct super_block *sb;
+
+	lockdep_off();
+
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (!(sb->s_flags & MS_FROZEN))
+			continue;
+
+		if (sb->s_type->fs_flags & FS_IS_FUSE) {
+			if (!(which & FS_FREEZER_FUSE))
+				continue;
+
+			sb->s_frozen = SB_UNFROZEN;
+		} else {
+			if (!(which & FS_FREEZER_NORMAL))
+				continue;
+
+			thaw_bdev(sb->s_bdev, sb);
+		}
+		sb->s_flags &= ~MS_FROZEN;
+	}
+
+	lockdep_on();
+}
+
 /*
  * Various filesystems appear to want __find_get_block to be non-blocking.
  * But it's the page lock which protects the buffers.  To get around this,
diff -Npur linux-2.6-block/fs/drop_caches.c linux-2.6-block-custom/fs/drop_caches.c
--- linux-2.6-block/fs/drop_caches.c	2008-09-27 16:12:24.638924221 +0900
+++ linux-2.6-block-custom/fs/drop_caches.c	2008-09-26 19:45:20.366786422 +0900
@@ -33,7 +33,7 @@ static void drop_pagecache_sb(struct sup
 	iput(toput_inode);
 }
 
-static void drop_pagecache(void)
+void drop_pagecache(void)
 {
 	struct super_block *sb;
 
diff -Npur linux-2.6-block/fs/fuse/control.c linux-2.6-block-custom/fs/fuse/control.c
--- linux-2.6-block/fs/fuse/control.c	2008-09-27 16:12:24.790896399 +0900
+++ linux-2.6-block-custom/fs/fuse/control.c	2008-09-26 19:45:20.366786422 +0900
@@ -207,6 +207,7 @@ static void fuse_ctl_kill_sb(struct supe
 static struct file_system_type fuse_ctl_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fusectl",
+	.fs_flags	= FS_IS_FUSE,
 	.get_sb		= fuse_ctl_get_sb,
 	.kill_sb	= fuse_ctl_kill_sb,
 };
diff -Npur linux-2.6-block/fs/fuse/dev.c linux-2.6-block-custom/fs/fuse/dev.c
--- linux-2.6-block/fs/fuse/dev.c	2008-09-27 16:12:24.790896399 +0900
+++ linux-2.6-block-custom/fs/fuse/dev.c	2008-09-26 19:45:20.370775197 +0900
@@ -7,6 +7,7 @@
 */
 
 #include "fuse_i.h"
+#include "fuse.h"
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -16,6 +17,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/freezer.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 
@@ -743,6 +745,8 @@ static ssize_t fuse_dev_read(struct kioc
 	if (!fc)
 		return -EPERM;
 
+	FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_dev_read");
+
  restart:
 	spin_lock(&fc->lock);
 	err = -EAGAIN;
@@ -869,6 +873,9 @@ static ssize_t fuse_dev_write(struct kio
 	if (!fc)
 		return -EPERM;
 
+	FUSE_MIGHT_FREEZE(iocb->ki_filp->f_mapping->host->i_sb,
+			"fuse_dev_write");
+
 	fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
 	if (nbytes < sizeof(struct fuse_out_header))
 		return -EINVAL;
diff -Npur linux-2.6-block/fs/fuse/dir.c linux-2.6-block-custom/fs/fuse/dir.c
--- linux-2.6-block/fs/fuse/dir.c	2008-09-27 16:12:24.794897047 +0900
+++ linux-2.6-block-custom/fs/fuse/dir.c	2008-09-26 19:45:20.370775197 +0900
@@ -7,12 +7,14 @@
 */
 
 #include "fuse_i.h"
+#include "fuse.h"
 
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/freezer.h>
 
 #if BITS_PER_LONG >= 64
 static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
@@ -174,6 +176,9 @@ static int fuse_dentry_revalidate(struct
 			return 0;
 
 		fc = get_fuse_conn(inode);
+
+		FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_dentry_revalidate");
+
 		req = fuse_get_req(fc);
 		if (IS_ERR(req))
 			return 0;
@@ -273,6 +278,8 @@ int fuse_lookup_name(struct super_block 
 	if (IS_ERR(req))
 		goto out;
 
+	FUSE_MIGHT_FREEZE(sb, "fuse_lookup");
+
 	forget_req = fuse_get_req(fc);
 	err = PTR_ERR(forget_req);
 	if (IS_ERR(forget_req)) {
@@ -402,6 +409,8 @@ static int fuse_create_open(struct inode
 	if (IS_ERR(forget_req))
 		return PTR_ERR(forget_req);
 
+	FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_create_open");
+
 	req = fuse_get_req(fc);
 	err = PTR_ERR(req);
 	if (IS_ERR(req))
@@ -488,6 +497,8 @@ static int create_new_entry(struct fuse_
 	int err;
 	struct fuse_req *forget_req;
 
+	FUSE_MIGHT_FREEZE(dir->i_sb, "create_new_entry");
+
 	forget_req = fuse_get_req(fc);
 	if (IS_ERR(forget_req)) {
 		fuse_put_request(fc, req);
@@ -585,7 +596,11 @@ static int fuse_mkdir(struct inode *dir,
 {
 	struct fuse_mkdir_in inarg;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req(fc);
+	struct fuse_req *req;
+
+	FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_mkdir");
+
+	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -605,7 +620,11 @@ static int fuse_symlink(struct inode *di
 {
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	unsigned len = strlen(link) + 1;
-	struct fuse_req *req = fuse_get_req(fc);
+	struct fuse_req *req;
+
+	FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_symlink");
+
+	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -622,7 +641,11 @@ static int fuse_unlink(struct inode *dir
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req(fc);
+	struct fuse_req *req;
+
+	FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_unlink");
+
+	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -653,7 +676,11 @@ static int fuse_rmdir(struct inode *dir,
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
-	struct fuse_req *req = fuse_get_req(fc);
+	struct fuse_req *req;
+
+	FUSE_MIGHT_FREEZE(dir->i_sb, "fuse_rmdir");
+
+	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
diff -Npur linux-2.6-block/fs/fuse/file.c linux-2.6-block-custom/fs/fuse/file.c
--- linux-2.6-block/fs/fuse/file.c	2008-09-27 16:12:24.794897047 +0900
+++ linux-2.6-block-custom/fs/fuse/file.c	2008-09-26 19:45:20.370775197 +0900
@@ -7,11 +7,13 @@
 */
 
 #include "fuse_i.h"
+#include "fuse.h"
 
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/freezer.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -23,6 +25,8 @@ static int fuse_send_open(struct inode *
 	struct fuse_req *req;
 	int err;
 
+	FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_send_open");
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -674,6 +678,8 @@ static int fuse_buffered_write(struct fi
 	if (is_bad_inode(inode))
 		return -EIO;
 
+	FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_commit_write");
+
 	/*
 	 * Make sure writepages on the same page are not mixed up with
 	 * plain writes.
@@ -962,6 +968,8 @@ static ssize_t fuse_direct_io(struct fil
 	if (is_bad_inode(inode))
 		return -EIO;
 
+	FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_direct_io");
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1315,6 +1323,8 @@ static int fuse_getlk(struct file *file,
 	struct fuse_lk_out outarg;
 	int err;
 
+	FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_getlk");
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1350,6 +1360,8 @@ static int fuse_setlk(struct file *file,
 	if (fl->fl_flags & FL_CLOSE)
 		return 0;
 
+	FUSE_MIGHT_FREEZE(file->f_mapping->host->i_sb, "fuse_setlk");
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -1416,6 +1428,8 @@ static sector_t fuse_bmap(struct address
 	if (!inode->i_sb->s_bdev || fc->no_bmap)
 		return 0;
 
+	FUSE_MIGHT_FREEZE(inode->i_sb, "fuse_bmap");
+
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
 		return 0;
diff -Npur linux-2.6-block/fs/fuse/fuse.h linux-2.6-block-custom/fs/fuse/fuse.h
--- linux-2.6-block/fs/fuse/fuse.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/fs/fuse/fuse.h	2008-09-26 20:12:21.763780942 +0900
@@ -0,0 +1,13 @@
+#define FUSE_MIGHT_FREEZE(superblock, desc) \
+do { \
+	int printed = 0; \
+	while (superblock->s_frozen != SB_UNFROZEN) { \
+		if (!printed) { \
+			printk(KERN_INFO "%d frozen in " desc ".\n", \
+						current->pid); \
+			printed = 1; \
+		} \
+		try_to_freeze(); \
+		yield(); \
+	} \
+} while (0)
diff -Npur linux-2.6-block/fs/fuse/inode.c linux-2.6-block-custom/fs/fuse/inode.c
--- linux-2.6-block/fs/fuse/inode.c	2008-09-27 16:12:24.798947213 +0900
+++ linux-2.6-block-custom/fs/fuse/inode.c	2008-09-26 19:45:20.374785553 +0900
@@ -914,7 +914,7 @@ static int fuse_get_sb(struct file_syste
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
-	.fs_flags	= FS_HAS_SUBTYPE,
+	.fs_flags	= FS_HAS_SUBTYPE | FS_IS_FUSE,
 	.get_sb		= fuse_get_sb,
 	.kill_sb	= kill_anon_super,
 };
@@ -933,7 +933,7 @@ static struct file_system_type fuseblk_f
 	.name		= "fuseblk",
 	.get_sb		= fuse_get_sb_blk,
 	.kill_sb	= kill_block_super,
-	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
+	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_IS_FUSE,
 };
 
 static inline int register_fuseblk(void)
diff -Npur linux-2.6-block/fs/ioctl.c linux-2.6-block-custom/fs/ioctl.c
--- linux-2.6-block/fs/ioctl.c	2008-09-27 16:12:24.910925901 +0900
+++ linux-2.6-block-custom/fs/ioctl.c	2008-09-26 19:45:20.374785553 +0900
@@ -211,3 +211,4 @@ asmlinkage long sys_ioctl(unsigned int f
  out:
 	return error;
 }
+EXPORT_SYMBOL(sys_ioctl);
diff -Npur linux-2.6-block/fs/namei.c linux-2.6-block-custom/fs/namei.c
--- linux-2.6-block/fs/namei.c	2008-09-27 16:12:25.078943414 +0900
+++ linux-2.6-block-custom/fs/namei.c	2008-09-26 19:45:20.374785553 +0900
@@ -2223,6 +2223,8 @@ int vfs_unlink(struct inode *dir, struct
 	if (!dir->i_op || !dir->i_op->unlink)
 		return -EPERM;
 
+	vfs_check_frozen(dir->i_sb, SB_FREEZE_WRITE);
+
 	DQUOT_INIT(dir);
 
 	mutex_lock(&dentry->d_inode->i_mutex);
diff -Npur linux-2.6-block/fs/squashfs/Makefile linux-2.6-block-custom/fs/squashfs/Makefile
--- linux-2.6-block/fs/squashfs/Makefile	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/fs/squashfs/Makefile	2008-09-26 20:30:06.846771936 +0900
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux squashfs routines.
+#
+
+obj-$(CONFIG_SQUASHFS) += squashfs.o
+squashfs-y += inode.o
+squashfs-y += squashfs2_0.o
diff -Npur linux-2.6-block/fs/squashfs/inode.c linux-2.6-block-custom/fs/squashfs/inode.c
--- linux-2.6-block/fs/squashfs/inode.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/fs/squashfs/inode.c	2008-09-26 20:34:21.450760363 +0900
@@ -0,0 +1,2157 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * inode.c
+ */
+
+#include <linux/squashfs_fs.h>
+#include <linux/module.h>
+#include <linux/zlib.h>
+#include <linux/fs.h>
+#include <linux/squashfs_fs_sb.h>
+#include <linux/squashfs_fs_i.h>
+#include <linux/buffer_head.h>
+#include <linux/vfs.h>
+#include <linux/vmalloc.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/exportfs.h>
+
+#include "squashfs.h"
+
+static struct dentry *squashfs_fh_to_dentry(struct super_block *s,
+		struct fid *fid, int fh_len, int fh_type);
+static struct dentry *squashfs_fh_to_parent(struct super_block *s,
+		struct fid *fid, int fh_len, int fh_type);
+static struct dentry *squashfs_get_parent(struct dentry *child);
+static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode);
+static int squashfs_statfs(struct dentry *, struct kstatfs *);
+static int squashfs_symlink_readpage(struct file *file, struct page *page);
+static long long read_blocklist(struct inode *inode, int index,
+				int readahead_blks, char *block_list,
+				unsigned short **block_p, unsigned int *bsize);
+static int squashfs_readpage(struct file *file, struct page *page);
+static int squashfs_readdir(struct file *, void *, filldir_t);
+static struct dentry *squashfs_lookup(struct inode *, struct dentry *,
+				struct nameidata *);
+static int squashfs_remount(struct super_block *s, int *flags, char *data);
+static void squashfs_put_super(struct super_block *);
+static int squashfs_get_sb(struct file_system_type *,int, const char *, void *,
+				struct vfsmount *);
+static struct inode *squashfs_alloc_inode(struct super_block *sb);
+static void squashfs_destroy_inode(struct inode *inode);
+static int init_inodecache(void);
+static void destroy_inodecache(void);
+
+static struct file_system_type squashfs_fs_type = {
+	.owner = THIS_MODULE,
+	.name = "squashfs",
+	.get_sb = squashfs_get_sb,
+	.kill_sb = kill_block_super,
+	.fs_flags = FS_REQUIRES_DEV
+};
+
+static const unsigned char squashfs_filetype_table[] = {
+	DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK
+};
+
+static struct super_operations squashfs_super_ops = {
+	.alloc_inode = squashfs_alloc_inode,
+	.destroy_inode = squashfs_destroy_inode,
+	.statfs = squashfs_statfs,
+	.put_super = squashfs_put_super,
+	.remount_fs = squashfs_remount
+};
+
+static struct export_operations squashfs_export_ops = {
+	.fh_to_dentry = squashfs_fh_to_dentry,
+	.fh_to_parent = squashfs_fh_to_parent,
+	.get_parent = squashfs_get_parent
+};
+
+SQSH_EXTERN const struct address_space_operations squashfs_symlink_aops = {
+	.readpage = squashfs_symlink_readpage
+};
+
+SQSH_EXTERN const struct address_space_operations squashfs_aops = {
+	.readpage = squashfs_readpage
+};
+
+static const struct file_operations squashfs_dir_ops = {
+	.read = generic_read_dir,
+	.readdir = squashfs_readdir
+};
+
+SQSH_EXTERN struct inode_operations squashfs_dir_inode_ops = {
+	.lookup = squashfs_lookup
+};
+
+
+static struct buffer_head *get_block_length(struct super_block *s,
+				int *cur_index, int *offset, int *c_byte)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	unsigned short temp;
+	struct buffer_head *bh;
+
+	if (!(bh = sb_bread(s, *cur_index)))
+		goto out;
+
+	if (msblk->devblksize - *offset == 1) {
+		if (msblk->swap)
+			((unsigned char *) &temp)[1] = *((unsigned char *)
+				(bh->b_data + *offset));
+		else
+			((unsigned char *) &temp)[0] = *((unsigned char *)
+				(bh->b_data + *offset));
+		brelse(bh);
+		if (!(bh = sb_bread(s, ++(*cur_index))))
+			goto out;
+		if (msblk->swap)
+			((unsigned char *) &temp)[0] = *((unsigned char *)
+				bh->b_data); 
+		else
+			((unsigned char *) &temp)[1] = *((unsigned char *)
+				bh->b_data); 
+		*c_byte = temp;
+		*offset = 1;
+	} else {
+		if (msblk->swap) {
+			((unsigned char *) &temp)[1] = *((unsigned char *)
+				(bh->b_data + *offset));
+			((unsigned char *) &temp)[0] = *((unsigned char *)
+				(bh->b_data + *offset + 1)); 
+		} else {
+			((unsigned char *) &temp)[0] = *((unsigned char *)
+				(bh->b_data + *offset));
+			((unsigned char *) &temp)[1] = *((unsigned char *)
+				(bh->b_data + *offset + 1)); 
+		}
+		*c_byte = temp;
+		*offset += 2;
+	}
+
+	if (SQUASHFS_CHECK_DATA(msblk->sblk.flags)) {
+		if (*offset == msblk->devblksize) {
+			brelse(bh);
+			if (!(bh = sb_bread(s, ++(*cur_index))))
+				goto out;
+			*offset = 0;
+		}
+		if (*((unsigned char *) (bh->b_data + *offset)) !=
+						SQUASHFS_MARKER_BYTE) {
+			ERROR("Metadata block marker corrupt @ %x\n",
+						*cur_index);
+			brelse(bh);
+			goto out;
+		}
+		(*offset)++;
+	}
+	return bh;
+
+out:
+	return NULL;
+}
+
+
+SQSH_EXTERN unsigned int squashfs_read_data(struct super_block *s, char *buffer,
+			long long index, unsigned int length,
+			long long *next_index, int srclength)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	struct buffer_head **bh;
+	unsigned int offset = index & ((1 << msblk->devblksize_log2) - 1);
+	unsigned int cur_index = index >> msblk->devblksize_log2;
+	int bytes, avail_bytes, b = 0, k = 0;
+	unsigned int compressed;
+	unsigned int c_byte = length;
+
+	bh = kmalloc(((sblk->block_size >> msblk->devblksize_log2) + 1) *
+								sizeof(struct buffer_head *), GFP_KERNEL);
+	if (bh == NULL)
+		goto read_failure;
+
+	if (c_byte) {
+		bytes = -offset;
+		compressed = SQUASHFS_COMPRESSED_BLOCK(c_byte);
+		c_byte = SQUASHFS_COMPRESSED_SIZE_BLOCK(c_byte);
+
+		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index,
+					compressed ? "" : "un", (unsigned int) c_byte, srclength);
+
+		if (c_byte > srclength || index < 0 || (index + c_byte) > sblk->bytes_used)
+			goto read_failure;
+
+		for (b = 0; bytes < (int) c_byte; b++, cur_index++) {
+			bh[b] = sb_getblk(s, cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(READ, b, bh);
+	} else {
+		if (index < 0 || (index + 2) > sblk->bytes_used)
+			goto read_failure;
+
+		bh[0] = get_block_length(s, &cur_index, &offset, &c_byte);
+		if (bh[0] == NULL)
+			goto read_failure;
+		b = 1;
+
+		bytes = msblk->devblksize - offset;
+		compressed = SQUASHFS_COMPRESSED(c_byte);
+		c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte);
+
+		TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed
+					? "" : "un", (unsigned int) c_byte);
+
+		if (c_byte > srclength || (index + c_byte) > sblk->bytes_used)
+			goto block_release;
+
+		for (; bytes < c_byte; b++) {
+			bh[b] = sb_getblk(s, ++cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(READ, b - 1, bh + 1);
+	}
+
+	if (compressed) {
+		int zlib_err = 0;
+
+		/*
+	 	* uncompress block
+	 	*/
+
+		mutex_lock(&msblk->read_data_mutex);
+
+		msblk->stream.next_out = buffer;
+		msblk->stream.avail_out = srclength;
+
+		for (bytes = 0; k < b; k++) {
+			avail_bytes = min(c_byte - bytes, msblk->devblksize - offset);
+
+			wait_on_buffer(bh[k]);
+			if (!buffer_uptodate(bh[k]))
+				goto release_mutex;
+
+			msblk->stream.next_in = bh[k]->b_data + offset;
+			msblk->stream.avail_in = avail_bytes;
+
+			if (k == 0) {
+				zlib_err = zlib_inflateInit(&msblk->stream);
+				if (zlib_err != Z_OK) {
+					ERROR("zlib_inflateInit returned unexpected result 0x%x,"
+						" srclength %d\n", zlib_err, srclength);
+					goto release_mutex;
+				}
+
+				if (avail_bytes == 0) {
+					offset = 0;
+					brelse(bh[k]);
+					continue;
+				}
+			}
+
+			zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
+			if (zlib_err != Z_OK && zlib_err != Z_STREAM_END) {
+				ERROR("zlib_inflate returned unexpected result 0x%x,"
+					" srclength %d, avail_in %d, avail_out %d\n", zlib_err,
+					srclength, msblk->stream.avail_in, msblk->stream.avail_out);
+				goto release_mutex;
+			}
+
+			bytes += avail_bytes;
+			offset = 0;
+			brelse(bh[k]);
+		}
+
+		if (zlib_err != Z_STREAM_END)
+			goto release_mutex;
+
+		zlib_err = zlib_inflateEnd(&msblk->stream);
+		if (zlib_err != Z_OK) {
+			ERROR("zlib_inflateEnd returned unexpected result 0x%x,"
+				" srclength %d\n", zlib_err, srclength);
+			goto release_mutex;
+		}
+		bytes = msblk->stream.total_out;
+		mutex_unlock(&msblk->read_data_mutex);
+	} else {
+		int i;
+
+		for(i = 0; i < b; i++) {
+			wait_on_buffer(bh[i]);
+			if (!buffer_uptodate(bh[i]))
+				goto block_release;
+		}
+
+		for (bytes = 0; k < b; k++) {
+			avail_bytes = min(c_byte - bytes, msblk->devblksize - offset);
+
+			memcpy(buffer + bytes, bh[k]->b_data + offset, avail_bytes);
+			bytes += avail_bytes;
+			offset = 0;
+			brelse(bh[k]);
+		}
+	}
+
+	if (next_index)
+		*next_index = index + c_byte + (length ? 0 :
+				(SQUASHFS_CHECK_DATA(msblk->sblk.flags) ? 3 : 2));
+
+	kfree(bh);
+	return bytes;
+
+release_mutex:
+	mutex_unlock(&msblk->read_data_mutex);
+
+block_release:
+	for (; k < b; k++)
+		brelse(bh[k]);
+
+read_failure:
+	ERROR("sb_bread failed reading block 0x%x\n", cur_index);
+	kfree(bh);
+	return 0;
+}
+
+
+static struct squashfs_cache_entry *squashfs_cache_get(struct super_block *s,
+	struct squashfs_cache *cache, long long block, int length)
+{
+	int i, n;
+	struct squashfs_cache_entry *entry;
+
+	spin_lock(&cache->lock);
+
+	while (1) {
+		for (i = 0; i < cache->entries && cache->entry[i].block != block; i++);
+
+		if (i == cache->entries) {
+			if (cache->unused_blks == 0) {
+				cache->waiting ++;
+				spin_unlock(&cache->lock);
+				wait_event(cache->wait_queue, cache->unused_blks);
+				spin_lock(&cache->lock);
+				cache->waiting --;
+				continue;
+			}
+
+			i = cache->next_blk;
+			for (n = 0; n < cache->entries; n++) {
+				if (cache->entry[i].locked == 0)
+					break;
+				i = (i + 1) % cache->entries;
+			}
+
+			cache->next_blk = (i + 1) % cache->entries;
+			entry = &cache->entry[i];
+
+			cache->unused_blks --;
+			entry->block = block;
+			entry->locked = 1;
+			entry->pending = 1;
+			entry->waiting = 0;
+			entry->error = 0;
+			spin_unlock(&cache->lock);
+
+			entry->length = squashfs_read_data(s, entry->data,
+				block, length, &entry->next_index, cache->block_size);
+
+			spin_lock(&cache->lock);
+
+			if (entry->length == 0)
+				entry->error = 1;
+
+			entry->pending = 0;
+			spin_unlock(&cache->lock);
+			if (entry->waiting)
+				wake_up_all(&entry->wait_queue);
+			goto out;
+		}
+
+		entry = &cache->entry[i];
+		if (entry->locked == 0)
+			cache->unused_blks --;
+		entry->locked++;
+
+		if (entry->pending) {
+			entry->waiting ++;
+			spin_unlock(&cache->lock);
+			wait_event(entry->wait_queue, !entry->pending);
+			goto out;
+		}
+
+		spin_unlock(&cache->lock);
+		goto out;
+	}
+
+out:
+	TRACE("Got %s %d, start block %lld, locked %d, error %d\n", i,
+		cache->name, entry->block, entry->locked, entry->error);
+	if (entry->error)
+		ERROR("Unable to read %s cache entry [%llx]\n", cache->name, block);
+	return entry;
+}
+
+
+static void squashfs_cache_put(struct squashfs_cache *cache,
+				struct squashfs_cache_entry *entry)
+{
+	spin_lock(&cache->lock);
+	entry->locked --;
+	if (entry->locked == 0) {
+		cache->unused_blks ++;
+		spin_unlock(&cache->lock);
+		if (cache->waiting)
+			wake_up(&cache->wait_queue);
+	} else
+		spin_unlock(&cache->lock);
+}
+
+
+static void squashfs_cache_delete(struct squashfs_cache *cache)
+{
+	int i;
+
+	if (cache == NULL)
+		return;
+
+	for (i = 0; i < cache->entries; i++)
+		if (cache->entry[i].data) {
+			if (cache->use_vmalloc)
+				vfree(cache->entry[i].data);
+			else
+				kfree(cache->entry[i].data);
+		}
+
+	kfree(cache);
+}
+
+
+static struct squashfs_cache *squashfs_cache_init(char *name, int entries,
+	int block_size, int use_vmalloc)
+{
+	int i;
+	struct squashfs_cache *cache = kzalloc(sizeof(struct squashfs_cache) +
+			entries * sizeof(struct squashfs_cache_entry), GFP_KERNEL);
+	if (cache == NULL) {
+		ERROR("Failed to allocate %s cache\n", name);
+		goto failed;
+	}
+
+	cache->next_blk = 0;
+	cache->unused_blks = entries;
+	cache->entries = entries;
+	cache->block_size = block_size;
+	cache->use_vmalloc = use_vmalloc;
+	cache->name = name;
+	cache->waiting = 0;
+	spin_lock_init(&cache->lock);
+	init_waitqueue_head(&cache->wait_queue);
+
+	for (i = 0; i < entries; i++) {
+		init_waitqueue_head(&cache->entry[i].wait_queue);
+		cache->entry[i].block = SQUASHFS_INVALID_BLK;
+		cache->entry[i].data = use_vmalloc ? vmalloc(block_size) :
+				kmalloc(block_size, GFP_KERNEL);
+		if (cache->entry[i].data == NULL) {
+			ERROR("Failed to allocate %s cache entry\n", name);
+			goto cleanup;
+		}
+	}
+
+	return cache;
+
+cleanup:
+	squashfs_cache_delete(cache);
+failed:
+	return NULL;
+}
+
+
+SQSH_EXTERN int squashfs_get_cached_block(struct super_block *s, void *buffer,
+				long long block, unsigned int offset,
+				int length, long long *next_block,
+				unsigned int *next_offset)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	int bytes, return_length = length;
+	struct squashfs_cache_entry *entry;
+
+	TRACE("Entered squashfs_get_cached_block [%llx:%x]\n", block, offset);
+
+	while (1) {
+		entry = squashfs_cache_get(s, msblk->block_cache, block, 0);
+		bytes = entry->length - offset;
+
+		if (entry->error || bytes < 1) {
+			return_length = 0;
+			goto finish;
+		} else if (bytes >= length) {
+			if (buffer)
+				memcpy(buffer, entry->data + offset, length);
+			if (entry->length - offset == length) {
+				*next_block = entry->next_index;
+				*next_offset = 0;
+			} else {
+				*next_block = block;
+				*next_offset = offset + length;
+			}
+			goto finish;
+		} else {
+			if (buffer) {
+				memcpy(buffer, entry->data + offset, bytes);
+				buffer = (char *) buffer + bytes;
+			}
+			block = entry->next_index;
+			squashfs_cache_put(msblk->block_cache, entry);
+			length -= bytes;
+			offset = 0;
+		}
+	}
+
+finish:
+	squashfs_cache_put(msblk->block_cache, entry);
+	return return_length;
+}
+
+
+static int get_fragment_location(struct super_block *s, unsigned int fragment,
+				long long *fragment_start_block,
+				unsigned int *fragment_size)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	long long start_block =
+		msblk->fragment_index[SQUASHFS_FRAGMENT_INDEX(fragment)];
+	int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+	struct squashfs_fragment_entry fragment_entry;
+
+	if (msblk->swap) {
+		struct squashfs_fragment_entry sfragment_entry;
+
+		if (!squashfs_get_cached_block(s, &sfragment_entry, start_block, offset,
+					 sizeof(sfragment_entry), &start_block, &offset))
+			goto out;
+		SQUASHFS_SWAP_FRAGMENT_ENTRY(&fragment_entry, &sfragment_entry);
+	} else
+		if (!squashfs_get_cached_block(s, &fragment_entry, start_block, offset,
+					 sizeof(fragment_entry), &start_block, &offset))
+			goto out;
+
+	*fragment_start_block = fragment_entry.start_block;
+	*fragment_size = fragment_entry.size;
+
+	return 1;
+
+out:
+	return 0;
+}
+
+
+SQSH_EXTERN void release_cached_fragment(struct squashfs_sb_info *msblk,
+				struct squashfs_cache_entry *fragment)
+{
+	squashfs_cache_put(msblk->fragment_cache, fragment);
+}
+
+
+SQSH_EXTERN
+struct squashfs_cache_entry *get_cached_fragment(struct super_block *s,
+				long long start_block, int length)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+
+	return squashfs_cache_get(s, msblk->fragment_cache, start_block, length);
+}
+
+
+static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i,
+				struct squashfs_base_inode_header *inodeb)
+{
+	i->i_ino = inodeb->inode_number;
+	i->i_mtime.tv_sec = inodeb->mtime;
+	i->i_atime.tv_sec = inodeb->mtime;
+	i->i_ctime.tv_sec = inodeb->mtime;
+	i->i_uid = msblk->uid[inodeb->uid];
+	i->i_mode = inodeb->mode;
+	i->i_size = 0;
+
+	if (inodeb->guid == SQUASHFS_GUIDS)
+		i->i_gid = i->i_uid;
+	else
+		i->i_gid = msblk->guid[inodeb->guid];
+}
+
+
+static squashfs_inode_t squashfs_inode_lookup(struct super_block *s, int ino)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	long long start = msblk->inode_lookup_table[SQUASHFS_LOOKUP_BLOCK(ino - 1)];
+	int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino - 1);
+	squashfs_inode_t inode;
+
+	TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino);
+
+	if (msblk->swap) {
+		squashfs_inode_t sinode;
+
+		if (!squashfs_get_cached_block(s, &sinode, start, offset,
+					sizeof(sinode), &start, &offset))
+			goto out;
+		SQUASHFS_SWAP_INODE_T((&inode), &sinode);
+	} else if (!squashfs_get_cached_block(s, &inode, start, offset,
+					sizeof(inode), &start, &offset))
+			goto out;
+
+	TRACE("squashfs_inode_lookup, inode = 0x%llx\n", inode);
+
+	return inode;
+
+out:
+	return SQUASHFS_INVALID_BLK;
+}
+
+
+
+static struct dentry *squashfs_export_iget(struct super_block *s,
+	unsigned int inode_number)
+{
+	squashfs_inode_t inode;
+	struct dentry *dentry = ERR_PTR(-ENOENT);
+
+	TRACE("Entered squashfs_export_iget\n");
+
+	inode = squashfs_inode_lookup(s, inode_number);
+	if(inode != SQUASHFS_INVALID_BLK)
+		dentry = d_alloc_anon(squashfs_iget(s, inode, inode_number));
+
+	return dentry;
+}
+
+
+static struct dentry *squashfs_fh_to_dentry(struct super_block *s,
+		struct fid *fid, int fh_len, int fh_type)
+{
+	if((fh_type != FILEID_INO32_GEN && fh_type != FILEID_INO32_GEN_PARENT) ||
+			fh_len < 2)
+		return NULL;
+
+	return squashfs_export_iget(s, fid->i32.ino);
+}
+
+
+static struct dentry *squashfs_fh_to_parent(struct super_block *s,
+		struct fid *fid, int fh_len, int fh_type)
+{
+	if(fh_type != FILEID_INO32_GEN_PARENT || fh_len < 4)
+		return NULL;
+
+	return squashfs_export_iget(s, fid->i32.parent_ino);
+}
+
+
+static struct dentry *squashfs_get_parent(struct dentry *child)
+{
+	struct inode *i = child->d_inode;
+
+	TRACE("Entered squashfs_get_parent\n");
+
+	return squashfs_export_iget(i->i_sb, SQUASHFS_I(i)->u.s2.parent_inode);
+}
+
+
+SQSH_EXTERN struct inode *squashfs_iget(struct super_block *s,
+				squashfs_inode_t inode, unsigned int inode_number)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct inode *i = iget_locked(s, inode_number);
+
+	TRACE("Entered squashfs_iget\n");
+
+	if(i && (i->i_state & I_NEW)) {
+		(msblk->read_inode)(i, inode);
+		unlock_new_inode(i);
+	}
+
+	return i;
+}
+
+
+static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode)
+{
+	struct super_block *s = i->i_sb;
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	long long block = SQUASHFS_INODE_BLK(inode) + sblk->inode_table_start;
+	unsigned int offset = SQUASHFS_INODE_OFFSET(inode);
+	long long next_block;
+	unsigned int next_offset;
+	union squashfs_inode_header id, sid;
+	struct squashfs_base_inode_header *inodeb = &id.base, *sinodeb = &sid.base;
+
+	TRACE("Entered squashfs_read_inode\n");
+
+	if (msblk->swap) {
+		if (!squashfs_get_cached_block(s, sinodeb, block, offset,
+					sizeof(*sinodeb), &next_block, &next_offset))
+			goto failed_read;
+		SQUASHFS_SWAP_BASE_INODE_HEADER(inodeb, sinodeb, sizeof(*sinodeb));
+	} else
+		if (!squashfs_get_cached_block(s, inodeb, block, offset,
+					sizeof(*inodeb), &next_block, &next_offset))
+			goto failed_read;
+
+	squashfs_new_inode(msblk, i, inodeb);
+
+	switch(inodeb->inode_type) {
+		case SQUASHFS_FILE_TYPE: {
+			unsigned int frag_size;
+			long long frag_blk;
+			struct squashfs_reg_inode_header *inodep = &id.reg;
+			struct squashfs_reg_inode_header *sinodep = &sid.reg;
+				
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_REG_INODE_HEADER(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			frag_blk = SQUASHFS_INVALID_BLK;
+
+			if (inodep->fragment != SQUASHFS_INVALID_FRAG)
+					if(!get_fragment_location(s, inodep->fragment, &frag_blk,
+												&frag_size))
+						goto failed_read;
+				
+			i->i_nlink = 1;
+			i->i_size = inodep->file_size;
+			i->i_fop = &generic_ro_fops;
+			i->i_mode |= S_IFREG;
+			i->i_blocks = ((i->i_size - 1) >> 9) + 1;
+			SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk;
+			SQUASHFS_I(i)->u.s1.fragment_size = frag_size;
+			SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->u.s1.block_list_start = next_block;
+			SQUASHFS_I(i)->offset = next_offset;
+			i->i_data.a_ops = &squashfs_aops;
+
+			TRACE("File inode %x:%x, start_block %llx, "
+					"block_list_start %llx, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->start_block, next_block,
+					next_offset);
+			break;
+		}
+		case SQUASHFS_LREG_TYPE: {
+			unsigned int frag_size;
+			long long frag_blk;
+			struct squashfs_lreg_inode_header *inodep = &id.lreg;
+			struct squashfs_lreg_inode_header *sinodep = &sid.lreg;
+				
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_LREG_INODE_HEADER(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			frag_blk = SQUASHFS_INVALID_BLK;
+
+			if (inodep->fragment != SQUASHFS_INVALID_FRAG)
+				if (!get_fragment_location(s, inodep->fragment, &frag_blk,
+												 &frag_size))
+					goto failed_read;
+				
+			i->i_nlink = inodep->nlink;
+			i->i_size = inodep->file_size;
+			i->i_fop = &generic_ro_fops;
+			i->i_mode |= S_IFREG;
+			i->i_blocks = ((i->i_size - 1) >> 9) + 1;
+			SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk;
+			SQUASHFS_I(i)->u.s1.fragment_size = frag_size;
+			SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->u.s1.block_list_start = next_block;
+			SQUASHFS_I(i)->offset = next_offset;
+			i->i_data.a_ops = &squashfs_aops;
+
+			TRACE("File inode %x:%x, start_block %llx, "
+					"block_list_start %llx, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->start_block, next_block,
+					next_offset);
+			break;
+		}
+		case SQUASHFS_DIR_TYPE: {
+			struct squashfs_dir_inode_header *inodep = &id.dir;
+			struct squashfs_dir_inode_header *sinodep = &sid.dir;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DIR_INODE_HEADER(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			i->i_nlink = inodep->nlink;
+			i->i_size = inodep->file_size;
+			i->i_op = &squashfs_dir_inode_ops;
+			i->i_fop = &squashfs_dir_ops;
+			i->i_mode |= S_IFDIR;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->offset = inodep->offset;
+			SQUASHFS_I(i)->u.s2.directory_index_count = 0;
+			SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode;
+
+			TRACE("Directory inode %x:%x, start_block %x, offset "
+					"%x\n", SQUASHFS_INODE_BLK(inode),
+					offset, inodep->start_block,
+					inodep->offset);
+			break;
+		}
+		case SQUASHFS_LDIR_TYPE: {
+			struct squashfs_ldir_inode_header *inodep = &id.ldir;
+			struct squashfs_ldir_inode_header *sinodep = &sid.ldir;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_LDIR_INODE_HEADER(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			i->i_nlink = inodep->nlink;
+			i->i_size = inodep->file_size;
+			i->i_op = &squashfs_dir_inode_ops;
+			i->i_fop = &squashfs_dir_ops;
+			i->i_mode |= S_IFDIR;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->offset = inodep->offset;
+			SQUASHFS_I(i)->u.s2.directory_index_start = next_block;
+			SQUASHFS_I(i)->u.s2.directory_index_offset = next_offset;
+			SQUASHFS_I(i)->u.s2.directory_index_count = inodep->i_count;
+			SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode;
+
+			TRACE("Long directory inode %x:%x, start_block %x, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->start_block, inodep->offset);
+			break;
+		}
+		case SQUASHFS_SYMLINK_TYPE: {
+			struct squashfs_symlink_inode_header *inodep = &id.symlink;
+			struct squashfs_symlink_inode_header *sinodep = &sid.symlink;
+	
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_SYMLINK_INODE_HEADER(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			i->i_nlink = inodep->nlink;
+			i->i_size = inodep->symlink_size;
+			i->i_op = &page_symlink_inode_operations;
+			i->i_data.a_ops = &squashfs_symlink_aops;
+			i->i_mode |= S_IFLNK;
+			SQUASHFS_I(i)->start_block = next_block;
+			SQUASHFS_I(i)->offset = next_offset;
+
+			TRACE("Symbolic link inode %x:%x, start_block %llx, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					next_block, next_offset);
+			break;
+		 }
+		 case SQUASHFS_BLKDEV_TYPE:
+		 case SQUASHFS_CHRDEV_TYPE: {
+			struct squashfs_dev_inode_header *inodep = &id.dev;
+			struct squashfs_dev_inode_header *sinodep = &sid.dev;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DEV_INODE_HEADER(inodep, sinodep);
+			} else	
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			i->i_nlink = inodep->nlink;
+			i->i_mode |= (inodeb->inode_type == SQUASHFS_CHRDEV_TYPE) ?
+					S_IFCHR : S_IFBLK;
+			init_special_inode(i, i->i_mode, old_decode_dev(inodep->rdev));
+
+			TRACE("Device inode %x:%x, rdev %x\n",
+					SQUASHFS_INODE_BLK(inode), offset, inodep->rdev);
+			break;
+		 }
+		 case SQUASHFS_FIFO_TYPE:
+		 case SQUASHFS_SOCKET_TYPE: {
+			struct squashfs_ipc_inode_header *inodep = &id.ipc;
+			struct squashfs_ipc_inode_header *sinodep = &sid.ipc;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, sinodep, block, offset,
+						sizeof(*sinodep), &next_block, &next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_IPC_INODE_HEADER(inodep, sinodep);
+			} else	
+				if (!squashfs_get_cached_block(s, inodep, block, offset,
+						sizeof(*inodep), &next_block, &next_offset))
+					goto failed_read;
+
+			i->i_nlink = inodep->nlink;
+			i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE)
+							? S_IFIFO : S_IFSOCK;
+			init_special_inode(i, i->i_mode, 0);
+			break;
+		 }
+		 default:
+			ERROR("Unknown inode type %d in squashfs_iget!\n",
+					inodeb->inode_type);
+			goto failed_read1;
+	}
+	
+	return 1;
+
+failed_read:
+	ERROR("Unable to read inode [%llx:%x]\n", block, offset);
+
+failed_read1:
+	make_bad_inode(i);
+	return 0;
+}
+
+
+static int read_inode_lookup_table(struct super_block *s)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(sblk->inodes);
+
+	TRACE("In read_inode_lookup_table, length %d\n", length);
+
+	/* Allocate inode lookup table */
+	msblk->inode_lookup_table = kmalloc(length, GFP_KERNEL);
+	if (msblk->inode_lookup_table == NULL) {
+		ERROR("Failed to allocate inode lookup table\n");
+		return 0;
+	}
+   
+	if (!squashfs_read_data(s, (char *) msblk->inode_lookup_table,
+			sblk->lookup_table_start, length |
+			SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) {
+		ERROR("unable to read inode lookup table\n");
+		return 0;
+	}
+
+	if (msblk->swap) {
+		int i;
+		long long block;
+
+		for (i = 0; i < SQUASHFS_LOOKUP_BLOCKS(sblk->inodes); i++) {
+			/* XXX */
+			SQUASHFS_SWAP_LOOKUP_BLOCKS((&block),
+						&msblk->inode_lookup_table[i], 1);
+			msblk->inode_lookup_table[i] = block;
+		}
+	}
+
+	return 1;
+}
+
+
+static int read_fragment_index_table(struct super_block *s)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(sblk->fragments);
+
+	if(length == 0)
+		return 1;
+
+	/* Allocate fragment index table */
+	msblk->fragment_index = kmalloc(length, GFP_KERNEL);
+	if (msblk->fragment_index == NULL) {
+		ERROR("Failed to allocate fragment index table\n");
+		return 0;
+	}
+   
+	if (!squashfs_read_data(s, (char *) msblk->fragment_index,
+			sblk->fragment_table_start, length |
+			SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) {
+		ERROR("unable to read fragment index table\n");
+		return 0;
+	}
+
+	if (msblk->swap) {
+		int i;
+		long long fragment;
+
+		for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES(sblk->fragments); i++) {
+			/* XXX */
+			SQUASHFS_SWAP_FRAGMENT_INDEXES((&fragment),
+						&msblk->fragment_index[i], 1);
+			msblk->fragment_index[i] = fragment;
+		}
+	}
+
+	return 1;
+}
+
+
+static int supported_squashfs_filesystem(struct squashfs_sb_info *msblk, int silent)
+{
+	struct squashfs_super_block *sblk = &msblk->sblk;
+
+	msblk->read_inode = squashfs_read_inode;
+	msblk->read_blocklist = read_blocklist;
+	msblk->read_fragment_index_table = read_fragment_index_table;
+
+	if (sblk->s_major == 1) {
+		if (!squashfs_1_0_supported(msblk)) {
+			SERROR("Major/Minor mismatch, Squashfs 1.0 filesystems "
+				"are unsupported\n");
+			SERROR("Please recompile with Squashfs 1.0 support enabled\n");
+			return 0;
+		}
+	} else if (sblk->s_major == 2) {
+		if (!squashfs_2_0_supported(msblk)) {
+			SERROR("Major/Minor mismatch, Squashfs 2.0 filesystems "
+				"are unsupported\n");
+			SERROR("Please recompile with Squashfs 2.0 support enabled\n");
+			return 0;
+		}
+	} else if(sblk->s_major != SQUASHFS_MAJOR || sblk->s_minor >
+			SQUASHFS_MINOR) {
+		SERROR("Major/Minor mismatch, trying to mount newer %d.%d "
+				"filesystem\n", sblk->s_major, sblk->s_minor);
+		SERROR("Please update your kernel\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+
+static int squashfs_fill_super(struct super_block *s, void *data, int silent)
+{
+	struct squashfs_sb_info *msblk;
+	struct squashfs_super_block *sblk;
+	char b[BDEVNAME_SIZE];
+	struct inode *root;
+
+	TRACE("Entered squashfs_fill_superblock\n");
+
+	s->s_fs_info = kzalloc(sizeof(struct squashfs_sb_info), GFP_KERNEL);
+	if (s->s_fs_info == NULL) {
+		ERROR("Failed to allocate superblock\n");
+		goto failure;
+	}
+	msblk = s->s_fs_info;
+
+	msblk->stream.workspace = vmalloc(zlib_inflate_workspacesize());
+	if (msblk->stream.workspace == NULL) {
+		ERROR("Failed to allocate zlib workspace\n");
+		goto failure;
+	}
+	sblk = &msblk->sblk;
+	
+	msblk->devblksize = sb_min_blocksize(s, BLOCK_SIZE);
+	msblk->devblksize_log2 = ffz(~msblk->devblksize);
+
+	mutex_init(&msblk->read_data_mutex);
+	mutex_init(&msblk->read_page_mutex);
+	mutex_init(&msblk->meta_index_mutex);
+	
+	/* sblk->bytes_used is checked in squashfs_read_data to ensure reads are not
+ 	 * beyond filesystem end.  As we're using squashfs_read_data to read sblk here,
+ 	 * first set sblk->bytes_used to a useful value */
+	sblk->bytes_used = sizeof(struct squashfs_super_block);
+	if (!squashfs_read_data(s, (char *) sblk, SQUASHFS_START,
+					sizeof(struct squashfs_super_block) |
+					SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, sizeof(struct squashfs_super_block))) {
+		SERROR("unable to read superblock\n");
+		goto failed_mount;
+	}
+
+	/* Check it is a SQUASHFS superblock */
+	if ((s->s_magic = sblk->s_magic) != SQUASHFS_MAGIC) {
+		if (sblk->s_magic == SQUASHFS_MAGIC_SWAP) {
+			struct squashfs_super_block ssblk;
+
+			WARNING("Mounting a different endian SQUASHFS filesystem on %s\n",
+				bdevname(s->s_bdev, b));
+
+			SQUASHFS_SWAP_SUPER_BLOCK(&ssblk, sblk);
+			memcpy(sblk, &ssblk, sizeof(struct squashfs_super_block));
+			msblk->swap = 1;
+		} else  {
+			SERROR("Can't find a SQUASHFS superblock on %s\n",
+							bdevname(s->s_bdev, b));
+			goto failed_mount;
+		}
+	}
+
+	/* Check the MAJOR & MINOR versions */
+	if(!supported_squashfs_filesystem(msblk, silent))
+		goto failed_mount;
+
+	/* Check the filesystem does not extend beyond the end of the
+	   block device */
+	if(sblk->bytes_used < 0 || sblk->bytes_used > i_size_read(s->s_bdev->bd_inode))
+		goto failed_mount;
+
+	/* Check the root inode for sanity */
+	if (SQUASHFS_INODE_OFFSET(sblk->root_inode) > SQUASHFS_METADATA_SIZE)
+		goto failed_mount;
+
+	TRACE("Found valid superblock on %s\n", bdevname(s->s_bdev, b));
+	TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(sblk->flags)
+					? "un" : "");
+	TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(sblk->flags)
+					? "un" : "");
+	TRACE("Check data is %spresent in the filesystem\n",
+					SQUASHFS_CHECK_DATA(sblk->flags) ?  "" : "not ");
+	TRACE("Filesystem size %lld bytes\n", sblk->bytes_used);
+	TRACE("Block size %d\n", sblk->block_size);
+	TRACE("Number of inodes %d\n", sblk->inodes);
+	if (sblk->s_major > 1)
+		TRACE("Number of fragments %d\n", sblk->fragments);
+	TRACE("Number of uids %d\n", sblk->no_uids);
+	TRACE("Number of gids %d\n", sblk->no_guids);
+	TRACE("sblk->inode_table_start %llx\n", sblk->inode_table_start);
+	TRACE("sblk->directory_table_start %llx\n", sblk->directory_table_start);
+	if (sblk->s_major > 1)
+		TRACE("sblk->fragment_table_start %llx\n", sblk->fragment_table_start);
+	TRACE("sblk->uid_start %llx\n", sblk->uid_start);
+
+	s->s_maxbytes = MAX_LFS_FILESIZE;
+	s->s_flags |= MS_RDONLY;
+	s->s_op = &squashfs_super_ops;
+
+	msblk->block_cache = squashfs_cache_init("metadata", SQUASHFS_CACHED_BLKS,
+		SQUASHFS_METADATA_SIZE, 0);
+	if (msblk->block_cache == NULL)
+		goto failed_mount;
+
+	/* Allocate read_page block */
+	msblk->read_page = vmalloc(sblk->block_size);
+	if (msblk->read_page == NULL) {
+		ERROR("Failed to allocate read_page block\n");
+		goto failed_mount;
+	}
+
+	/* Allocate uid and gid tables */
+	msblk->uid = kmalloc((sblk->no_uids + sblk->no_guids) *
+					sizeof(unsigned int), GFP_KERNEL);
+	if (msblk->uid == NULL) {
+		ERROR("Failed to allocate uid/gid table\n");
+		goto failed_mount;
+	}
+	msblk->guid = msblk->uid + sblk->no_uids;
+   
+	if (msblk->swap) {
+		unsigned int suid[sblk->no_uids + sblk->no_guids];
+
+		if (!squashfs_read_data(s, (char *) &suid, sblk->uid_start,
+					((sblk->no_uids + sblk->no_guids) *
+					 sizeof(unsigned int)) |
+					SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) {
+			ERROR("unable to read uid/gid table\n");
+			goto failed_mount;
+		}
+
+		SQUASHFS_SWAP_DATA(msblk->uid, suid, (sblk->no_uids +
+			sblk->no_guids), (sizeof(unsigned int) * 8));
+	} else
+		if (!squashfs_read_data(s, (char *) msblk->uid, sblk->uid_start,
+					((sblk->no_uids + sblk->no_guids) *
+					 sizeof(unsigned int)) |
+					SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) {
+			ERROR("unable to read uid/gid table\n");
+			goto failed_mount;
+		}
+
+
+	if (sblk->s_major == 1 && squashfs_1_0_supported(msblk))
+		goto allocate_root;
+
+	msblk->fragment_cache = squashfs_cache_init("fragment",
+		SQUASHFS_CACHED_FRAGMENTS, sblk->block_size, 1);
+	if (msblk->fragment_cache == NULL)
+		goto failed_mount;
+
+	/* Allocate and read fragment index table */
+	if (msblk->read_fragment_index_table(s) == 0)
+		goto failed_mount;
+
+	if(sblk->s_major < 3 || sblk->lookup_table_start == SQUASHFS_INVALID_BLK)
+		goto allocate_root;
+
+	/* Allocate and read inode lookup table */
+	if (read_inode_lookup_table(s) == 0)
+		goto failed_mount;
+
+	s->s_export_op = &squashfs_export_ops;
+
+allocate_root:
+	root = new_inode(s);
+	if ((msblk->read_inode)(root, sblk->root_inode) == 0)
+		goto failed_mount;
+	insert_inode_hash(root);
+
+	s->s_root = d_alloc_root(root);
+	if (s->s_root == NULL) {
+		ERROR("Root inode create failed\n");
+		iput(root);
+		goto failed_mount;
+	}
+
+	TRACE("Leaving squashfs_fill_super\n");
+	return 0;
+
+failed_mount:
+	kfree(msblk->inode_lookup_table);
+	kfree(msblk->fragment_index);
+	squashfs_cache_delete(msblk->fragment_cache);
+	kfree(msblk->uid);
+	vfree(msblk->read_page);
+	squashfs_cache_delete(msblk->block_cache);
+	kfree(msblk->fragment_index_2);
+	vfree(msblk->stream.workspace);
+	kfree(s->s_fs_info);
+	s->s_fs_info = NULL;
+	return -EINVAL;
+
+failure:
+	return -ENOMEM;
+}
+
+
+static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+
+	TRACE("Entered squashfs_statfs\n");
+
+	buf->f_type = SQUASHFS_MAGIC;
+	buf->f_bsize = sblk->block_size;
+	buf->f_blocks = ((sblk->bytes_used - 1) >> sblk->block_log) + 1;
+	buf->f_bfree = buf->f_bavail = 0;
+	buf->f_files = sblk->inodes;
+	buf->f_ffree = 0;
+	buf->f_namelen = SQUASHFS_NAME_LEN;
+
+	return 0;
+}
+
+
+static int squashfs_symlink_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	int index = page->index << PAGE_CACHE_SHIFT, length, bytes, avail_bytes;
+	long long block = SQUASHFS_I(inode)->start_block;
+	int offset = SQUASHFS_I(inode)->offset;
+	void *pageaddr = kmap(page);
+
+	TRACE("Entered squashfs_symlink_readpage, page index %ld, start block "
+				"%llx, offset %x\n", page->index,
+				SQUASHFS_I(inode)->start_block,
+				SQUASHFS_I(inode)->offset);
+
+	for (length = 0; length < index; length += bytes) {
+		bytes = squashfs_get_cached_block(inode->i_sb, NULL, block,
+				offset, PAGE_CACHE_SIZE, &block, &offset);
+		if (bytes == 0) {
+			ERROR("Unable to read symbolic link [%llx:%x]\n", block, offset);
+			goto skip_read;
+		}
+	}
+
+	if (length != index) {
+		ERROR("(squashfs_symlink_readpage) length != index\n");
+		bytes = 0;
+		goto skip_read;
+	}
+
+	avail_bytes = min_t(int, i_size_read(inode) - length, PAGE_CACHE_SIZE);
+
+	bytes = squashfs_get_cached_block(inode->i_sb, pageaddr, block, offset,
+		avail_bytes, &block, &offset);
+	if (bytes == 0)
+		ERROR("Unable to read symbolic link [%llx:%x]\n", block, offset);
+
+skip_read:
+	memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+	kunmap(page);
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	unlock_page(page);
+
+	return 0;
+}
+
+
+static struct meta_index *locate_meta_index(struct inode *inode, int index, int offset)
+{
+	struct meta_index *meta = NULL;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int i;
+
+	mutex_lock(&msblk->meta_index_mutex);
+
+	TRACE("locate_meta_index: index %d, offset %d\n", index, offset);
+
+	if (msblk->meta_index == NULL)
+		goto not_allocated;
+
+	for (i = 0; i < SQUASHFS_META_NUMBER; i ++) {
+		if (msblk->meta_index[i].inode_number == inode->i_ino &&
+				msblk->meta_index[i].offset >= offset &&
+				msblk->meta_index[i].offset <= index &&
+				msblk->meta_index[i].locked == 0) {
+			TRACE("locate_meta_index: entry %d, offset %d\n", i,
+					msblk->meta_index[i].offset);
+			meta = &msblk->meta_index[i];
+			offset = meta->offset;
+		}
+	}
+
+	if (meta)
+		meta->locked = 1;
+
+not_allocated:
+	mutex_unlock(&msblk->meta_index_mutex);
+
+	return meta;
+}
+
+
+static struct meta_index *empty_meta_index(struct inode *inode, int offset, int skip)
+{
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	struct meta_index *meta = NULL;
+	int i;
+
+	mutex_lock(&msblk->meta_index_mutex);
+
+	TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip);
+
+	if (msblk->meta_index == NULL) {
+		msblk->meta_index = kmalloc(sizeof(struct meta_index) *
+					SQUASHFS_META_NUMBER, GFP_KERNEL);
+		if (msblk->meta_index == NULL) {
+			ERROR("Failed to allocate meta_index\n");
+			goto failed;
+		}
+		for (i = 0; i < SQUASHFS_META_NUMBER; i++) {
+			msblk->meta_index[i].inode_number = 0;
+			msblk->meta_index[i].locked = 0;
+		}
+		msblk->next_meta_index = 0;
+	}
+
+	for (i = SQUASHFS_META_NUMBER; i &&
+			msblk->meta_index[msblk->next_meta_index].locked; i --)
+		msblk->next_meta_index = (msblk->next_meta_index + 1) %
+			SQUASHFS_META_NUMBER;
+
+	if (i == 0) {
+		TRACE("empty_meta_index: failed!\n");
+		goto failed;
+	}
+
+	TRACE("empty_meta_index: returned meta entry %d, %p\n",
+			msblk->next_meta_index,
+			&msblk->meta_index[msblk->next_meta_index]);
+
+	meta = &msblk->meta_index[msblk->next_meta_index];
+	msblk->next_meta_index = (msblk->next_meta_index + 1) %
+			SQUASHFS_META_NUMBER;
+
+	meta->inode_number = inode->i_ino;
+	meta->offset = offset;
+	meta->skip = skip;
+	meta->entries = 0;
+	meta->locked = 1;
+
+failed:
+	mutex_unlock(&msblk->meta_index_mutex);
+	return meta;
+}
+
+
+static void release_meta_index(struct inode *inode, struct meta_index *meta)
+{
+	meta->locked = 0;
+	smp_mb();
+}
+
+
+static int read_block_index(struct super_block *s, int blocks, char *block_list,
+				long long *start_block, int *offset)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	unsigned int *block_listp;
+	int block = 0;
+	
+	if (msblk->swap) {
+		char sblock_list[blocks << 2];
+
+		if (!squashfs_get_cached_block(s, sblock_list, *start_block,
+				*offset, blocks << 2, start_block, offset)) {
+			ERROR("Fail reading block list [%llx:%x]\n", *start_block, *offset);
+			goto failure;
+		}
+		SQUASHFS_SWAP_INTS(((unsigned int *)block_list),
+				((unsigned int *)sblock_list), blocks);
+	} else {
+		if (!squashfs_get_cached_block(s, block_list, *start_block,
+				*offset, blocks << 2, start_block, offset)) {
+			ERROR("Fail reading block list [%llx:%x]\n", *start_block, *offset);
+			goto failure;
+		}
+	}
+
+	for (block_listp = (unsigned int *) block_list; blocks;
+				block_listp++, blocks --)
+		block += SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_listp);
+
+	return block;
+
+failure:
+	return -1;
+}
+
+
+#define SIZE 256
+
+static inline int calculate_skip(int blocks) {
+	int skip = (blocks - 1) / ((SQUASHFS_SLOTS * SQUASHFS_META_ENTRIES + 1) * SQUASHFS_META_INDEXES);
+	return skip >= 7 ? 7 : skip + 1;
+}
+
+
+static int get_meta_index(struct inode *inode, int index,
+		long long *index_block, int *index_offset,
+		long long *data_block, char *block_list)
+{
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	int skip = calculate_skip(i_size_read(inode) >> sblk->block_log);
+	int offset = 0;
+	struct meta_index *meta;
+	struct meta_entry *meta_entry;
+	long long cur_index_block = SQUASHFS_I(inode)->u.s1.block_list_start;
+	int cur_offset = SQUASHFS_I(inode)->offset;
+	long long cur_data_block = SQUASHFS_I(inode)->start_block;
+	int i;
+ 
+	index /= SQUASHFS_META_INDEXES * skip;
+
+	while (offset < index) {
+		meta = locate_meta_index(inode, index, offset + 1);
+
+		if (meta == NULL) {
+			meta = empty_meta_index(inode, offset + 1, skip);
+			if (meta == NULL)
+				goto all_done;
+		} else {
+			if(meta->entries == 0)
+				goto failed;
+			/* XXX */
+			offset = index < meta->offset + meta->entries ? index :
+				meta->offset + meta->entries - 1;
+			/* XXX */
+			meta_entry = &meta->meta_entry[offset - meta->offset];
+			cur_index_block = meta_entry->index_block + sblk->inode_table_start;
+			cur_offset = meta_entry->offset;
+			cur_data_block = meta_entry->data_block;
+			TRACE("get_meta_index: offset %d, meta->offset %d, "
+				"meta->entries %d\n", offset, meta->offset, meta->entries);
+			TRACE("get_meta_index: index_block 0x%llx, offset 0x%x"
+				" data_block 0x%llx\n", cur_index_block,
+				cur_offset, cur_data_block);
+		}
+
+		for (i = meta->offset + meta->entries; i <= index &&
+				i < meta->offset + SQUASHFS_META_ENTRIES; i++) {
+			int blocks = skip * SQUASHFS_META_INDEXES;
+
+			while (blocks) {
+				int block = blocks > (SIZE >> 2) ? (SIZE >> 2) : blocks;
+				int res = read_block_index(inode->i_sb, block, block_list,
+					&cur_index_block, &cur_offset);
+
+				if (res == -1)
+					goto failed;
+
+				cur_data_block += res;
+				blocks -= block;
+			}
+
+			meta_entry = &meta->meta_entry[i - meta->offset];
+			meta_entry->index_block = cur_index_block - sblk->inode_table_start;
+			meta_entry->offset = cur_offset;
+			meta_entry->data_block = cur_data_block;
+			meta->entries ++;
+			offset ++;
+		}
+
+		TRACE("get_meta_index: meta->offset %d, meta->entries %d\n",
+				meta->offset, meta->entries);
+
+		release_meta_index(inode, meta);
+	}
+
+all_done:
+	*index_block = cur_index_block;
+	*index_offset = cur_offset;
+	*data_block = cur_data_block;
+
+	return offset * SQUASHFS_META_INDEXES * skip;
+
+failed:
+	release_meta_index(inode, meta);
+	return -1;
+}
+
+
+static long long read_blocklist(struct inode *inode, int index,
+				int readahead_blks, char *block_list,
+				unsigned short **block_p, unsigned int *bsize)
+{
+	long long block_ptr;
+	int offset;
+	long long block;
+	int res = get_meta_index(inode, index, &block_ptr, &offset, &block,
+		block_list);
+
+	TRACE("read_blocklist: res %d, index %d, block_ptr 0x%llx, offset"
+		       " 0x%x, block 0x%llx\n", res, index, block_ptr, offset, block);
+
+	if(res == -1)
+		goto failure;
+
+	index -= res;
+
+	while (index) {
+		int blocks = index > (SIZE >> 2) ? (SIZE >> 2) : index;
+		int res = read_block_index(inode->i_sb, blocks, block_list,
+			&block_ptr, &offset);
+		if (res == -1)
+			goto failure;
+		block += res;
+		index -= blocks;
+	}
+
+	if (read_block_index(inode->i_sb, 1, block_list, &block_ptr, &offset) == -1)
+		goto failure;
+	*bsize = *((unsigned int *) block_list);
+
+	return block;
+
+failure:
+	return 0;
+}
+
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	unsigned char *block_list = NULL;
+	long long block;
+	unsigned int bsize, i;
+	int bytes;
+	int index = page->index >> (sblk->block_log - PAGE_CACHE_SHIFT);
+ 	void *pageaddr;
+	struct squashfs_cache_entry *fragment = NULL;
+	char *data_ptr = msblk->read_page;
+	
+	int mask = (1 << (sblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int start_index = page->index & ~mask;
+	int end_index = start_index | mask;
+	int file_end = i_size_read(inode) >> sblk->block_log;
+	int sparse = 0;
+
+	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+					page->index, SQUASHFS_I(inode)->start_block);
+
+	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT))
+		goto out;
+
+	if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK
+					|| index < file_end) {
+		block_list = kmalloc(SIZE, GFP_KERNEL);
+		if (block_list == NULL) {
+			ERROR("Failed to allocate block_list\n");
+			goto error_out;
+		}
+
+		block = (msblk->read_blocklist)(inode, index, 1, block_list, NULL, &bsize);
+		if (block == 0)
+			goto error_out;
+
+		if (bsize == 0) { /* hole */
+			bytes = index == file_end ?
+				(i_size_read(inode) & (sblk->block_size - 1)) : sblk->block_size;
+			sparse = 1;
+		} else {
+			mutex_lock(&msblk->read_page_mutex);
+		
+			bytes = squashfs_read_data(inode->i_sb, msblk->read_page, block,
+				bsize, NULL, sblk->block_size);
+
+			if (bytes == 0) {
+				ERROR("Unable to read page, block %llx, size %x\n", block, bsize);
+				mutex_unlock(&msblk->read_page_mutex);
+				goto error_out;
+			}
+		}
+	} else {
+		fragment = get_cached_fragment(inode->i_sb,
+					SQUASHFS_I(inode)-> u.s1.fragment_start_block,
+					SQUASHFS_I(inode)->u.s1.fragment_size);
+
+		if (fragment->error) {
+			ERROR("Unable to read page, block %llx, size %x\n",
+					SQUASHFS_I(inode)->u.s1.fragment_start_block,
+					(int) SQUASHFS_I(inode)->u.s1.fragment_size);
+			release_cached_fragment(msblk, fragment);
+			goto error_out;
+		}
+		bytes = i_size_read(inode) & (sblk->block_size - 1);
+		data_ptr = fragment->data + SQUASHFS_I(inode)->u.s1.fragment_offset;
+	}
+
+	for (i = start_index; i <= end_index && bytes > 0; i++,
+						bytes -= PAGE_CACHE_SIZE, data_ptr += PAGE_CACHE_SIZE) {
+		struct page *push_page;
+		int avail = sparse ? 0 : min_t(unsigned int, bytes, PAGE_CACHE_SIZE);
+
+		TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
+
+		push_page = (i == page->index) ? page :
+			grab_cache_page_nowait(page->mapping, i);
+
+		if (!push_page)
+			continue;
+
+		if (PageUptodate(push_page))
+			goto skip_page;
+
+ 		pageaddr = kmap_atomic(push_page, KM_USER0);
+		memcpy(pageaddr, data_ptr, avail);
+		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		kunmap_atomic(pageaddr, KM_USER0);
+		flush_dcache_page(push_page);
+		SetPageUptodate(push_page);
+skip_page:
+		unlock_page(push_page);
+		if(i != page->index)
+			page_cache_release(push_page);
+	}
+
+	if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK
+					|| index < file_end) {
+		if (!sparse)
+			mutex_unlock(&msblk->read_page_mutex);
+		kfree(block_list);
+	} else
+		release_cached_fragment(msblk, fragment);
+
+	return 0;
+
+error_out:
+	SetPageError(page);
+out:
+	pageaddr = kmap_atomic(page, KM_USER0);
+	memset(pageaddr, 0, PAGE_CACHE_SIZE);
+	kunmap_atomic(pageaddr, KM_USER0);
+	flush_dcache_page(page);
+	if (!PageError(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+
+	kfree(block_list);
+	return 0;
+}
+
+
+static int get_dir_index_using_offset(struct super_block *s,
+				long long *next_block, unsigned int *next_offset,
+				long long index_start, unsigned int index_offset, int i_count,
+				long long f_pos)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	int i, length = 0;
+	struct squashfs_dir_index index;
+
+	TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n",
+					i_count, (unsigned int) f_pos);
+
+	f_pos -= 3;
+	if (f_pos == 0)
+		goto finish;
+
+	for (i = 0; i < i_count; i++) {
+		if (msblk->swap) {
+			struct squashfs_dir_index sindex;
+			squashfs_get_cached_block(s, &sindex, index_start, index_offset,
+					sizeof(sindex), &index_start, &index_offset);
+			SQUASHFS_SWAP_DIR_INDEX(&index, &sindex);
+		} else
+			squashfs_get_cached_block(s, &index, index_start, index_offset,
+					sizeof(index), &index_start, &index_offset);
+
+		if (index.index > f_pos)
+			break;
+
+		squashfs_get_cached_block(s, NULL, index_start, index_offset,
+					index.size + 1, &index_start, &index_offset);
+
+		length = index.index;
+		*next_block = index.start_block + sblk->directory_table_start;
+	}
+
+	*next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+
+finish:
+	return length + 3;
+}
+
+
+static int get_dir_index_using_name(struct super_block *s,
+				long long *next_block, unsigned int *next_offset,
+				long long index_start, unsigned int index_offset, int i_count,
+				const char *name, int size)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	int i, length = 0;
+	struct squashfs_dir_index *index;
+	char *str;
+
+	TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count);
+
+	str = kmalloc(sizeof(struct squashfs_dir_index) +
+		(SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL);
+	if (str == NULL) {
+		ERROR("Failed to allocate squashfs_dir_index\n");
+		goto failure;
+	}
+
+	index = (struct squashfs_dir_index *) (str + SQUASHFS_NAME_LEN + 1);
+	strncpy(str, name, size);
+	str[size] = '\0';
+
+	for (i = 0; i < i_count; i++) {
+		if (msblk->swap) {
+			struct squashfs_dir_index sindex;
+			squashfs_get_cached_block(s, &sindex, index_start, index_offset,
+				sizeof(sindex), &index_start, &index_offset);
+			SQUASHFS_SWAP_DIR_INDEX(index, &sindex);
+		} else
+			squashfs_get_cached_block(s, index, index_start, index_offset,
+				sizeof(struct squashfs_dir_index), &index_start, &index_offset);
+
+		squashfs_get_cached_block(s, index->name, index_start, index_offset,
+					index->size + 1, &index_start, &index_offset);
+
+		index->name[index->size + 1] = '\0';
+
+		if (strcmp(index->name, str) > 0)
+			break;
+
+		length = index->index;
+		*next_block = index->start_block + sblk->directory_table_start;
+	}
+
+	*next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+	kfree(str);
+
+failure:
+	return length + 3;
+}
+
+		
+static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct inode *i = file->f_dentry->d_inode;
+	struct squashfs_sb_info *msblk = i->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	long long next_block = SQUASHFS_I(i)->start_block +
+		sblk->directory_table_start;
+	int next_offset = SQUASHFS_I(i)->offset, length = 0, dir_count;
+	struct squashfs_dir_header dirh;
+	struct squashfs_dir_entry *dire;
+
+	TRACE("Entered squashfs_readdir [%llx:%x]\n", next_block, next_offset);
+
+	dire = kmalloc(sizeof(struct squashfs_dir_entry) +
+		SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
+	if (dire == NULL) {
+		ERROR("Failed to allocate squashfs_dir_entry\n");
+		goto finish;
+	}
+
+	while(file->f_pos < 3) {
+		char *name;
+		int size, i_ino;
+
+		if(file->f_pos == 0) {
+			name = ".";
+			size = 1;
+			i_ino = i->i_ino;
+		} else {
+			name = "..";
+			size = 2;
+			i_ino = SQUASHFS_I(i)->u.s2.parent_inode;
+		}
+		TRACE("Calling filldir(%x, %s, %d, %d, %d, %d)\n",
+				(unsigned int) dirent, name, size, (int)
+				file->f_pos, i_ino, squashfs_filetype_table[1]);
+
+		if (filldir(dirent, name, size, file->f_pos, i_ino,
+				squashfs_filetype_table[1]) < 0) {
+				TRACE("Filldir returned less than 0\n");
+			goto finish;
+		}
+		file->f_pos += size;
+	}
+
+	length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_start,
+				SQUASHFS_I(i)->u.s2.directory_index_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_count, file->f_pos);
+
+	while (length < i_size_read(i)) {
+		/* read directory header */
+		if (msblk->swap) {
+			struct squashfs_dir_header sdirh;
+			
+			if (!squashfs_get_cached_block(i->i_sb, &sdirh, next_block,
+					 next_offset, sizeof(sdirh), &next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(sdirh);
+			SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh);
+		} else {
+			if (!squashfs_get_cached_block(i->i_sb, &dirh, next_block,
+					next_offset, sizeof(dirh), &next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(dirh);
+		}
+
+		dir_count = dirh.count + 1;
+		while (dir_count--) {
+			if (msblk->swap) {
+				struct squashfs_dir_entry sdire;
+				if (!squashfs_get_cached_block(i->i_sb, &sdire, next_block,
+						next_offset, sizeof(sdire), &next_block, &next_offset))
+					goto failed_read;
+				
+				length += sizeof(sdire);
+				SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire);
+			} else {
+				if (!squashfs_get_cached_block(i->i_sb, dire, next_block,
+						next_offset, sizeof(*dire), &next_block, &next_offset))
+					goto failed_read;
+
+				length += sizeof(*dire);
+			}
+
+			if (!squashfs_get_cached_block(i->i_sb, dire->name, next_block,
+						next_offset, dire->size + 1, &next_block, &next_offset))
+				goto failed_read;
+
+			length += dire->size + 1;
+
+			if (file->f_pos >= length)
+				continue;
+
+			dire->name[dire->size + 1] = '\0';
+
+			TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d, %d)\n",
+					(unsigned int) dirent, dire->name, dire->size + 1,
+					(int) file->f_pos, dirh.start_block, dire->offset,
+					dirh.inode_number + dire->inode_number,
+					squashfs_filetype_table[dire->type]);
+
+			if (filldir(dirent, dire->name, dire->size + 1, file->f_pos,
+					dirh.inode_number + dire->inode_number,
+					squashfs_filetype_table[dire->type]) < 0) {
+				TRACE("Filldir returned less than 0\n");
+				goto finish;
+			}
+			file->f_pos = length;
+		}
+	}
+
+finish:
+	kfree(dire);
+	return 0;
+
+failed_read:
+	ERROR("Unable to read directory block [%llx:%x]\n", next_block,
+		next_offset);
+	kfree(dire);
+	return 0;
+}
+
+
+static struct dentry *squashfs_lookup(struct inode *i, struct dentry *dentry,
+				struct nameidata *nd)
+{
+	const unsigned char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	struct inode *inode = NULL;
+	struct squashfs_sb_info *msblk = i->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	long long next_block = SQUASHFS_I(i)->start_block +
+				sblk->directory_table_start;
+	int next_offset = SQUASHFS_I(i)->offset, length = 0, dir_count;
+	struct squashfs_dir_header dirh;
+	struct squashfs_dir_entry *dire;
+
+	TRACE("Entered squashfs_lookup [%llx:%x]\n", next_block, next_offset);
+
+	dire = kmalloc(sizeof(struct squashfs_dir_entry) +
+		SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
+	if (dire == NULL) {
+		ERROR("Failed to allocate squashfs_dir_entry\n");
+		goto exit_lookup;
+	}
+
+	if (len > SQUASHFS_NAME_LEN)
+		goto exit_lookup;
+
+	length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_start,
+				SQUASHFS_I(i)->u.s2.directory_index_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_count, name, len);
+
+	while (length < i_size_read(i)) {
+		/* read directory header */
+		if (msblk->swap) {
+			struct squashfs_dir_header sdirh;
+			if (!squashfs_get_cached_block(i->i_sb, &sdirh, next_block,
+					 next_offset, sizeof(sdirh), &next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(sdirh);
+			SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh);
+		} else {
+			if (!squashfs_get_cached_block(i->i_sb, &dirh, next_block,
+					next_offset, sizeof(dirh), &next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(dirh);
+		}
+
+		dir_count = dirh.count + 1;
+		while (dir_count--) {
+			if (msblk->swap) {
+				struct squashfs_dir_entry sdire;
+				if (!squashfs_get_cached_block(i->i_sb, &sdire, next_block,
+						next_offset, sizeof(sdire), &next_block, &next_offset))
+					goto failed_read;
+				
+				length += sizeof(sdire);
+				SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire);
+			} else {
+				if (!squashfs_get_cached_block(i->i_sb, dire, next_block,
+						next_offset, sizeof(*dire), &next_block, &next_offset))
+					goto failed_read;
+
+				length += sizeof(*dire);
+			}
+
+			if (!squashfs_get_cached_block(i->i_sb, dire->name, next_block,
+					next_offset, dire->size + 1, &next_block, &next_offset))
+				goto failed_read;
+
+			length += dire->size + 1;
+
+			if (name[0] < dire->name[0])
+				goto exit_lookup;
+
+			if ((len == dire->size + 1) && !strncmp(name, dire->name, len)) {
+				squashfs_inode_t ino = SQUASHFS_MKINODE(dirh.start_block,
+								dire->offset);
+
+				TRACE("calling squashfs_iget for directory entry %s, inode"
+					"  %x:%x, %d\n", name, dirh.start_block, dire->offset,
+					dirh.inode_number + dire->inode_number);
+
+				inode = squashfs_iget(i->i_sb, ino, dirh.inode_number + dire->inode_number);
+
+				goto exit_lookup;
+			}
+		}
+	}
+
+exit_lookup:
+	kfree(dire);
+	if (inode)
+		return d_splice_alias(inode, dentry);
+	d_add(dentry, inode);
+	return ERR_PTR(0);
+
+failed_read:
+	ERROR("Unable to read directory block [%llx:%x]\n", next_block,
+		next_offset);
+	goto exit_lookup;
+}
+
+
+static int squashfs_remount(struct super_block *s, int *flags, char *data)
+{
+	*flags |= MS_RDONLY;
+	return 0;
+}
+
+
+static void squashfs_put_super(struct super_block *s)
+{
+	if (s->s_fs_info) {
+		struct squashfs_sb_info *sbi = s->s_fs_info;
+		squashfs_cache_delete(sbi->block_cache);
+		squashfs_cache_delete(sbi->fragment_cache);
+		vfree(sbi->read_page);
+		kfree(sbi->uid);
+		kfree(sbi->fragment_index);
+		kfree(sbi->fragment_index_2);
+		kfree(sbi->meta_index);
+		vfree(sbi->stream.workspace);
+		kfree(s->s_fs_info);
+		s->s_fs_info = NULL;
+	}
+}
+
+
+static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
+				mnt);
+}
+
+
+static int __init init_squashfs_fs(void)
+{
+	int err = init_inodecache();
+	if (err)
+		goto out;
+
+	printk(KERN_INFO "squashfs: version 3.4 (2008/08/26) "
+		"Phillip Lougher\n");
+
+	err = register_filesystem(&squashfs_fs_type);
+	if (err)
+		destroy_inodecache();
+
+out:
+	return err;
+}
+
+
+static void __exit exit_squashfs_fs(void)
+{
+	unregister_filesystem(&squashfs_fs_type);
+	destroy_inodecache();
+}
+
+
+static struct kmem_cache * squashfs_inode_cachep;
+
+
+static struct inode *squashfs_alloc_inode(struct super_block *sb)
+{
+	struct squashfs_inode_info *ei;
+	ei = kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL);
+	return ei ? &ei->vfs_inode : NULL;
+}
+
+
+static void squashfs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(squashfs_inode_cachep, SQUASHFS_I(inode));
+}
+
+
+static void init_once(void *foo)
+{
+	struct squashfs_inode_info *ei = foo;
+
+	inode_init_once(&ei->vfs_inode);
+}
+ 
+
+static int __init init_inodecache(void)
+{
+	squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache",
+	    sizeof(struct squashfs_inode_info), 0,
+		SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once);
+	if (squashfs_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(squashfs_inode_cachep);
+}
+
+
+module_init(init_squashfs_fs);
+module_exit(exit_squashfs_fs);
+MODULE_DESCRIPTION("squashfs 3.4, a compressed read-only filesystem");
+MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>");
+MODULE_LICENSE("GPL");
diff -Npur linux-2.6-block/fs/squashfs/squashfs.h linux-2.6-block-custom/fs/squashfs/squashfs.h
--- linux-2.6-block/fs/squashfs/squashfs.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/fs/squashfs/squashfs.h	2008-09-26 20:30:06.850751003 +0900
@@ -0,0 +1,86 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * squashfs.h
+ */
+
+#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY
+#undef CONFIG_SQUASHFS_1_0_COMPATIBILITY
+#endif
+
+#ifdef SQUASHFS_TRACE
+#define TRACE(s, args...)	printk(KERN_NOTICE "SQUASHFS: "s, ## args)
+#else
+#define TRACE(s, args...)	{}
+#endif
+
+#define ERROR(s, args...)	printk(KERN_ERR "SQUASHFS error: "s, ## args)
+
+#define SERROR(s, args...)	do { \
+				if (!silent) \
+				printk(KERN_ERR "SQUASHFS error: "s, ## args);\
+				} while(0)
+
+#define WARNING(s, args...)	printk(KERN_WARNING "SQUASHFS: "s, ## args)
+
+static inline struct squashfs_inode_info *SQUASHFS_I(struct inode *inode)
+{
+	return list_entry(inode, struct squashfs_inode_info, vfs_inode);
+}
+
+#if defined(CONFIG_SQUASHFS_1_0_COMPATIBILITY ) || defined(CONFIG_SQUASHFS_2_0_COMPATIBILITY)
+#define SQSH_EXTERN
+extern unsigned int squashfs_read_data(struct super_block *s, char *buffer,
+				long long index, unsigned int length,
+				long long *next_index, int srclength);
+extern int squashfs_get_cached_block(struct super_block *s, void *buffer,
+				long long block, unsigned int offset,
+				int length, long long *next_block,
+				unsigned int *next_offset);
+extern void release_cached_fragment(struct squashfs_sb_info *msblk, struct
+					squashfs_cache_entry *fragment);
+extern struct squashfs_cache_entry *get_cached_fragment(struct super_block
+					*s, long long start_block,
+					int length);
+extern struct inode *squashfs_iget(struct super_block *s, squashfs_inode_t inode, unsigned int inode_number);
+extern const struct address_space_operations squashfs_symlink_aops;
+extern const struct address_space_operations squashfs_aops;
+extern struct inode_operations squashfs_dir_inode_ops;
+#else
+#define SQSH_EXTERN static
+#endif
+
+#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY
+extern int squashfs_1_0_supported(struct squashfs_sb_info *msblk);
+#else
+static inline int squashfs_1_0_supported(struct squashfs_sb_info *msblk)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY
+extern int squashfs_2_0_supported(struct squashfs_sb_info *msblk);
+#else
+static inline int squashfs_2_0_supported(struct squashfs_sb_info *msblk)
+{
+	return 0;
+}
+#endif
diff -Npur linux-2.6-block/fs/squashfs/squashfs2_0.c linux-2.6-block-custom/fs/squashfs/squashfs2_0.c
--- linux-2.6-block/fs/squashfs/squashfs2_0.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/fs/squashfs/squashfs2_0.c	2008-09-26 20:30:06.850751003 +0900
@@ -0,0 +1,740 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * squashfs2_0.c
+ */
+
+#include <linux/squashfs_fs.h>
+#include <linux/module.h>
+#include <linux/zlib.h>
+#include <linux/fs.h>
+#include <linux/squashfs_fs_sb.h>
+#include <linux/squashfs_fs_i.h>
+
+#include "squashfs.h"
+static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir);
+static struct dentry *squashfs_lookup_2(struct inode *, struct dentry *,
+				struct nameidata *);
+
+static struct file_operations squashfs_dir_ops_2 = {
+	.read = generic_read_dir,
+	.readdir = squashfs_readdir_2
+};
+
+static struct inode_operations squashfs_dir_inode_ops_2 = {
+	.lookup = squashfs_lookup_2
+};
+
+static unsigned char squashfs_filetype_table[] = {
+	DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK
+};
+
+static int read_fragment_index_table_2(struct super_block *s)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+
+	if (!(msblk->fragment_index_2 = kmalloc(SQUASHFS_FRAGMENT_INDEX_BYTES_2
+					(sblk->fragments), GFP_KERNEL))) {
+		ERROR("Failed to allocate uid/gid table\n");
+		return 0;
+	}
+   
+	if (SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments) &&
+					!squashfs_read_data(s, (char *)
+					msblk->fragment_index_2,
+					sblk->fragment_table_start,
+					SQUASHFS_FRAGMENT_INDEX_BYTES_2
+					(sblk->fragments) |
+					SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments))) {
+		ERROR("unable to read fragment index table\n");
+		return 0;
+	}
+
+	if (msblk->swap) {
+		int i;
+		unsigned int fragment;
+
+		for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES_2(sblk->fragments);
+									i++) {
+			SQUASHFS_SWAP_FRAGMENT_INDEXES_2((&fragment),
+						&msblk->fragment_index_2[i], 1);
+			msblk->fragment_index_2[i] = fragment;
+		}
+	}
+
+	return 1;
+}
+
+
+static int get_fragment_location_2(struct super_block *s, unsigned int fragment,
+				long long *fragment_start_block,
+				unsigned int *fragment_size)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	long long start_block =
+		msblk->fragment_index_2[SQUASHFS_FRAGMENT_INDEX_2(fragment)];
+	int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET_2(fragment);
+	struct squashfs_fragment_entry_2 fragment_entry;
+
+	if (msblk->swap) {
+		struct squashfs_fragment_entry_2 sfragment_entry;
+
+		if (!squashfs_get_cached_block(s, (char *) &sfragment_entry,
+					start_block, offset,
+					sizeof(sfragment_entry), &start_block,
+					&offset))
+			goto out;
+		SQUASHFS_SWAP_FRAGMENT_ENTRY_2(&fragment_entry, &sfragment_entry);
+	} else
+		if (!squashfs_get_cached_block(s, (char *) &fragment_entry,
+					start_block, offset,
+					sizeof(fragment_entry), &start_block,
+					&offset))
+			goto out;
+
+	*fragment_start_block = fragment_entry.start_block;
+	*fragment_size = fragment_entry.size;
+
+	return 1;
+
+out:
+	return 0;
+}
+
+
+static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i,
+		struct squashfs_base_inode_header_2 *inodeb, unsigned int ino)
+{
+	struct squashfs_super_block *sblk = &msblk->sblk;
+
+	i->i_ino = ino;
+	i->i_mtime.tv_sec = sblk->mkfs_time;
+	i->i_atime.tv_sec = sblk->mkfs_time;
+	i->i_ctime.tv_sec = sblk->mkfs_time;
+	i->i_uid = msblk->uid[inodeb->uid];
+	i->i_mode = inodeb->mode;
+	i->i_nlink = 1;
+	i->i_size = 0;
+	if (inodeb->guid == SQUASHFS_GUIDS)
+		i->i_gid = i->i_uid;
+	else
+		i->i_gid = msblk->guid[inodeb->guid];
+}
+
+
+static int squashfs_read_inode_2(struct inode *i, squashfs_inode_t inode)
+{
+	struct super_block *s = i->i_sb;
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	unsigned int block = SQUASHFS_INODE_BLK(inode) +
+		sblk->inode_table_start;
+	unsigned int offset = SQUASHFS_INODE_OFFSET(inode);
+	unsigned int ino = SQUASHFS_MK_VFS_INODE(block -
+		sblk->inode_table_start, offset);
+	long long next_block;
+	unsigned int next_offset;
+	union squashfs_inode_header_2 id, sid;
+	struct squashfs_base_inode_header_2 *inodeb = &id.base,
+					  *sinodeb = &sid.base;
+
+	TRACE("Entered squashfs_read_inode_2\n");
+
+	if (msblk->swap) {
+		if (!squashfs_get_cached_block(s, (char *) sinodeb, block,
+					offset, sizeof(*sinodeb), &next_block,
+					&next_offset))
+			goto failed_read;
+		SQUASHFS_SWAP_BASE_INODE_HEADER_2(inodeb, sinodeb,
+					sizeof(*sinodeb));
+	} else
+		if (!squashfs_get_cached_block(s, (char *) inodeb, block,
+					offset, sizeof(*inodeb), &next_block,
+					&next_offset))
+			goto failed_read;
+
+	squashfs_new_inode(msblk, i, inodeb, ino);
+
+	switch(inodeb->inode_type) {
+		case SQUASHFS_FILE_TYPE: {
+			struct squashfs_reg_inode_header_2 *inodep = &id.reg;
+			struct squashfs_reg_inode_header_2 *sinodep = &sid.reg;
+			long long frag_blk;
+			unsigned int frag_size = 0;
+				
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, (char *)
+						sinodep, block, offset,
+						sizeof(*sinodep), &next_block,
+						&next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_REG_INODE_HEADER_2(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, (char *)
+						inodep, block, offset,
+						sizeof(*inodep), &next_block,
+						&next_offset))
+					goto failed_read;
+
+			frag_blk = SQUASHFS_INVALID_BLK;
+			if (inodep->fragment != SQUASHFS_INVALID_FRAG &&
+					!get_fragment_location_2(s,
+					inodep->fragment, &frag_blk, &frag_size))
+				goto failed_read;
+				
+			i->i_size = inodep->file_size;
+			i->i_fop = &generic_ro_fops;
+			i->i_mode |= S_IFREG;
+			i->i_mtime.tv_sec = inodep->mtime;
+			i->i_atime.tv_sec = inodep->mtime;
+			i->i_ctime.tv_sec = inodep->mtime;
+			i->i_blocks = ((i->i_size - 1) >> 9) + 1;
+			SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk;
+			SQUASHFS_I(i)->u.s1.fragment_size = frag_size;
+			SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->u.s1.block_list_start = next_block;
+			SQUASHFS_I(i)->offset = next_offset;
+			i->i_data.a_ops = &squashfs_aops;
+
+			TRACE("File inode %x:%x, start_block %x, "
+					"block_list_start %llx, offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->start_block, next_block,
+					next_offset);
+			break;
+		}
+		case SQUASHFS_DIR_TYPE: {
+			struct squashfs_dir_inode_header_2 *inodep = &id.dir;
+			struct squashfs_dir_inode_header_2 *sinodep = &sid.dir;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, (char *)
+						sinodep, block, offset,
+						sizeof(*sinodep), &next_block,
+						&next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DIR_INODE_HEADER_2(inodep, sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, (char *)
+						inodep, block, offset,
+						sizeof(*inodep), &next_block,
+						&next_offset))
+					goto failed_read;
+
+			i->i_size = inodep->file_size;
+			i->i_op = &squashfs_dir_inode_ops_2;
+			i->i_fop = &squashfs_dir_ops_2;
+			i->i_mode |= S_IFDIR;
+			i->i_mtime.tv_sec = inodep->mtime;
+			i->i_atime.tv_sec = inodep->mtime;
+			i->i_ctime.tv_sec = inodep->mtime;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->offset = inodep->offset;
+			SQUASHFS_I(i)->u.s2.directory_index_count = 0;
+			SQUASHFS_I(i)->u.s2.parent_inode = 0;
+
+			TRACE("Directory inode %x:%x, start_block %x, offset "
+					"%x\n", SQUASHFS_INODE_BLK(inode),
+					offset, inodep->start_block,
+					inodep->offset);
+			break;
+		}
+		case SQUASHFS_LDIR_TYPE: {
+			struct squashfs_ldir_inode_header_2 *inodep = &id.ldir;
+			struct squashfs_ldir_inode_header_2 *sinodep = &sid.ldir;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, (char *)
+						sinodep, block, offset,
+						sizeof(*sinodep), &next_block,
+						&next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_LDIR_INODE_HEADER_2(inodep,
+						sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, (char *)
+						inodep, block, offset,
+						sizeof(*inodep), &next_block,
+						&next_offset))
+					goto failed_read;
+
+			i->i_size = inodep->file_size;
+			i->i_op = &squashfs_dir_inode_ops_2;
+			i->i_fop = &squashfs_dir_ops_2;
+			i->i_mode |= S_IFDIR;
+			i->i_mtime.tv_sec = inodep->mtime;
+			i->i_atime.tv_sec = inodep->mtime;
+			i->i_ctime.tv_sec = inodep->mtime;
+			SQUASHFS_I(i)->start_block = inodep->start_block;
+			SQUASHFS_I(i)->offset = inodep->offset;
+			SQUASHFS_I(i)->u.s2.directory_index_start = next_block;
+			SQUASHFS_I(i)->u.s2.directory_index_offset =
+								next_offset;
+			SQUASHFS_I(i)->u.s2.directory_index_count =
+								inodep->i_count;
+			SQUASHFS_I(i)->u.s2.parent_inode = 0;
+
+			TRACE("Long directory inode %x:%x, start_block %x, "
+					"offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->start_block, inodep->offset);
+			break;
+		}
+		case SQUASHFS_SYMLINK_TYPE: {
+			struct squashfs_symlink_inode_header_2 *inodep =
+								&id.symlink;
+			struct squashfs_symlink_inode_header_2 *sinodep =
+								&sid.symlink;
+	
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, (char *)
+						sinodep, block, offset,
+						sizeof(*sinodep), &next_block,
+						&next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(inodep,
+								sinodep);
+			} else
+				if (!squashfs_get_cached_block(s, (char *)
+						inodep, block, offset,
+						sizeof(*inodep), &next_block,
+						&next_offset))
+					goto failed_read;
+
+			i->i_size = inodep->symlink_size;
+			i->i_op = &page_symlink_inode_operations;
+			i->i_data.a_ops = &squashfs_symlink_aops;
+			i->i_mode |= S_IFLNK;
+			SQUASHFS_I(i)->start_block = next_block;
+			SQUASHFS_I(i)->offset = next_offset;
+
+			TRACE("Symbolic link inode %x:%x, start_block %llx, "
+					"offset %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					next_block, next_offset);
+			break;
+		 }
+		 case SQUASHFS_BLKDEV_TYPE:
+		 case SQUASHFS_CHRDEV_TYPE: {
+			struct squashfs_dev_inode_header_2 *inodep = &id.dev;
+			struct squashfs_dev_inode_header_2 *sinodep = &sid.dev;
+
+			if (msblk->swap) {
+				if (!squashfs_get_cached_block(s, (char *)
+						sinodep, block, offset,
+						sizeof(*sinodep), &next_block,
+						&next_offset))
+					goto failed_read;
+				SQUASHFS_SWAP_DEV_INODE_HEADER_2(inodep, sinodep);
+			} else	
+				if (!squashfs_get_cached_block(s, (char *)
+						inodep, block, offset,
+						sizeof(*inodep), &next_block,
+						&next_offset))
+					goto failed_read;
+
+			i->i_mode |= (inodeb->inode_type ==
+					SQUASHFS_CHRDEV_TYPE) ?  S_IFCHR :
+					S_IFBLK;
+			init_special_inode(i, i->i_mode,
+					old_decode_dev(inodep->rdev));
+
+			TRACE("Device inode %x:%x, rdev %x\n",
+					SQUASHFS_INODE_BLK(inode), offset,
+					inodep->rdev);
+			break;
+		 }
+		 case SQUASHFS_FIFO_TYPE:
+		 case SQUASHFS_SOCKET_TYPE: {
+
+			i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE)
+							? S_IFIFO : S_IFSOCK;
+			init_special_inode(i, i->i_mode, 0);
+			break;
+		 }
+		 default:
+			ERROR("Unknown inode type %d in squashfs_iget!\n",
+					inodeb->inode_type);
+			goto failed_read1;
+	}
+	
+	return 1;
+
+failed_read:
+	ERROR("Unable to read inode [%x:%x]\n", block, offset);
+
+failed_read1:
+	return 0;
+}
+
+
+static int get_dir_index_using_offset(struct super_block *s, long long 
+				*next_block, unsigned int *next_offset,
+				long long index_start,
+				unsigned int index_offset, int i_count,
+				long long f_pos)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	int i, length = 0;
+	struct squashfs_dir_index_2 index;
+
+	TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n",
+					i_count, (unsigned int) f_pos);
+
+	if (f_pos == 0)
+		goto finish;
+
+	for (i = 0; i < i_count; i++) {
+		if (msblk->swap) {
+			struct squashfs_dir_index_2 sindex;
+			squashfs_get_cached_block(s, (char *) &sindex,
+					index_start, index_offset,
+					sizeof(sindex), &index_start,
+					&index_offset);
+			SQUASHFS_SWAP_DIR_INDEX_2(&index, &sindex);
+		} else
+			squashfs_get_cached_block(s, (char *) &index,
+					index_start, index_offset,
+					sizeof(index), &index_start,
+					&index_offset);
+
+		if (index.index > f_pos)
+			break;
+
+		squashfs_get_cached_block(s, NULL, index_start, index_offset,
+					index.size + 1, &index_start,
+					&index_offset);
+
+		length = index.index;
+		*next_block = index.start_block + sblk->directory_table_start;
+	}
+
+	*next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+
+finish:
+	return length;
+}
+
+
+static int get_dir_index_using_name(struct super_block *s, long long
+				*next_block, unsigned int *next_offset,
+				long long index_start,
+				unsigned int index_offset, int i_count,
+				const char *name, int size)
+{
+	struct squashfs_sb_info *msblk = s->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	int i, length = 0;
+	struct squashfs_dir_index_2 *index;
+	char *str;
+
+	TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count);
+
+	if (!(str = kmalloc(sizeof(struct squashfs_dir_index) +
+		(SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL))) {
+		ERROR("Failed to allocate squashfs_dir_index\n");
+		goto failure;
+	}
+
+	index = (struct squashfs_dir_index_2 *) (str + SQUASHFS_NAME_LEN + 1);
+	strncpy(str, name, size);
+	str[size] = '\0';
+
+	for (i = 0; i < i_count; i++) {
+		if (msblk->swap) {
+			struct squashfs_dir_index_2 sindex;
+			squashfs_get_cached_block(s, (char *) &sindex,
+					index_start, index_offset,
+					sizeof(sindex), &index_start,
+					&index_offset);
+			SQUASHFS_SWAP_DIR_INDEX_2(index, &sindex);
+		} else
+			squashfs_get_cached_block(s, (char *) index,
+					index_start, index_offset,
+					sizeof(struct squashfs_dir_index_2),
+					&index_start, &index_offset);
+
+		squashfs_get_cached_block(s, index->name, index_start,
+					index_offset, index->size + 1,
+					&index_start, &index_offset);
+
+		index->name[index->size + 1] = '\0';
+
+		if (strcmp(index->name, str) > 0)
+			break;
+
+		length = index->index;
+		*next_block = index->start_block + sblk->directory_table_start;
+	}
+
+	*next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+	kfree(str);
+failure:
+	return length;
+}
+
+		
+static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir)
+{
+	struct inode *i = file->f_dentry->d_inode;
+	struct squashfs_sb_info *msblk = i->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	long long next_block = SQUASHFS_I(i)->start_block +
+		sblk->directory_table_start;
+	int next_offset = SQUASHFS_I(i)->offset, length = 0,
+		dir_count;
+	struct squashfs_dir_header_2 dirh;
+	struct squashfs_dir_entry_2 *dire;
+
+	TRACE("Entered squashfs_readdir_2 [%llx:%x]\n", next_block, next_offset);
+
+	if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) +
+		SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) {
+		ERROR("Failed to allocate squashfs_dir_entry\n");
+		goto finish;
+	}
+
+	length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_start,
+				SQUASHFS_I(i)->u.s2.directory_index_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_count,
+				file->f_pos);
+
+	while (length < i_size_read(i)) {
+		/* read directory header */
+		if (msblk->swap) {
+			struct squashfs_dir_header_2 sdirh;
+			
+			if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh,
+					next_block, next_offset, sizeof(sdirh),
+					&next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(sdirh);
+			SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh);
+		} else {
+			if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh,
+					next_block, next_offset, sizeof(dirh),
+					&next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(dirh);
+		}
+
+		dir_count = dirh.count + 1;
+		while (dir_count--) {
+			if (msblk->swap) {
+				struct squashfs_dir_entry_2 sdire;
+				if (!squashfs_get_cached_block(i->i_sb, (char *)
+						&sdire, next_block, next_offset,
+						sizeof(sdire), &next_block,
+						&next_offset))
+					goto failed_read;
+				
+				length += sizeof(sdire);
+				SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire);
+			} else {
+				if (!squashfs_get_cached_block(i->i_sb, (char *)
+						dire, next_block, next_offset,
+						sizeof(*dire), &next_block,
+						&next_offset))
+					goto failed_read;
+
+				length += sizeof(*dire);
+			}
+
+			if (!squashfs_get_cached_block(i->i_sb, dire->name,
+						next_block, next_offset,
+						dire->size + 1, &next_block,
+						&next_offset))
+				goto failed_read;
+
+			length += dire->size + 1;
+
+			if (file->f_pos >= length)
+				continue;
+
+			dire->name[dire->size + 1] = '\0';
+
+			TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d)\n",
+					(unsigned int) dirent, dire->name,
+					dire->size + 1, (int) file->f_pos,
+					dirh.start_block, dire->offset,
+					squashfs_filetype_table[dire->type]);
+
+			if (filldir(dirent, dire->name, dire->size + 1,
+					file->f_pos, SQUASHFS_MK_VFS_INODE(
+					dirh.start_block, dire->offset),
+					squashfs_filetype_table[dire->type])
+					< 0) {
+				TRACE("Filldir returned less than 0\n");
+				goto finish;
+			}
+			file->f_pos = length;
+		}
+	}
+
+finish:
+	kfree(dire);
+	return 0;
+
+failed_read:
+	ERROR("Unable to read directory block [%llx:%x]\n", next_block,
+		next_offset);
+	kfree(dire);
+	return 0;
+}
+
+
+static struct dentry *squashfs_lookup_2(struct inode *i, struct dentry *dentry,
+				struct nameidata *nd)
+{
+	const unsigned char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	struct inode *inode = NULL;
+	struct squashfs_sb_info *msblk = i->i_sb->s_fs_info;
+	struct squashfs_super_block *sblk = &msblk->sblk;
+	long long next_block = SQUASHFS_I(i)->start_block +
+				sblk->directory_table_start;
+	int next_offset = SQUASHFS_I(i)->offset, length = 0,
+				dir_count;
+	struct squashfs_dir_header_2 dirh;
+	struct squashfs_dir_entry_2 *dire;
+	int sorted = sblk->s_major == 2 && sblk->s_minor >= 1;
+
+	TRACE("Entered squashfs_lookup_2 [%llx:%x]\n", next_block, next_offset);
+
+	if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) +
+		SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) {
+		ERROR("Failed to allocate squashfs_dir_entry\n");
+		goto exit_loop;
+	}
+
+	if (len > SQUASHFS_NAME_LEN)
+		goto exit_loop;
+
+	length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_start,
+				SQUASHFS_I(i)->u.s2.directory_index_offset,
+				SQUASHFS_I(i)->u.s2.directory_index_count, name,
+				len);
+
+	while (length < i_size_read(i)) {
+		/* read directory header */
+		if (msblk->swap) {
+			struct squashfs_dir_header_2 sdirh;
+			if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh,
+					next_block, next_offset, sizeof(sdirh),
+					&next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(sdirh);
+			SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh);
+		} else {
+			if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh,
+					next_block, next_offset, sizeof(dirh),
+					&next_block, &next_offset))
+				goto failed_read;
+
+			length += sizeof(dirh);
+		}
+
+		dir_count = dirh.count + 1;
+		while (dir_count--) {
+			if (msblk->swap) {
+				struct squashfs_dir_entry_2 sdire;
+				if (!squashfs_get_cached_block(i->i_sb, (char *)
+						&sdire, next_block,next_offset,
+						sizeof(sdire), &next_block,
+						&next_offset))
+					goto failed_read;
+				
+				length += sizeof(sdire);
+				SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire);
+			} else {
+				if (!squashfs_get_cached_block(i->i_sb, (char *)
+						dire, next_block,next_offset,
+						sizeof(*dire), &next_block,
+						&next_offset))
+					goto failed_read;
+
+				length += sizeof(*dire);
+			}
+
+			if (!squashfs_get_cached_block(i->i_sb, dire->name,
+					next_block, next_offset, dire->size + 1,
+					&next_block, &next_offset))
+				goto failed_read;
+
+			length += dire->size + 1;
+
+			if (sorted && name[0] < dire->name[0])
+				goto exit_loop;
+
+			if ((len == dire->size + 1) && !strncmp(name,
+						dire->name, len)) {
+				squashfs_inode_t ino =
+					SQUASHFS_MKINODE(dirh.start_block,
+					dire->offset);
+				unsigned int inode_number = SQUASHFS_MK_VFS_INODE(dirh.start_block,
+					dire->offset);
+
+				TRACE("calling squashfs_iget for directory "
+					"entry %s, inode %x:%x, %lld\n", name,
+					dirh.start_block, dire->offset, ino);
+
+				inode = squashfs_iget(i->i_sb, ino, inode_number);
+
+				goto exit_loop;
+			}
+		}
+	}
+
+exit_loop:
+	kfree(dire);
+	d_add(dentry, inode);
+	return ERR_PTR(0);
+
+failed_read:
+	ERROR("Unable to read directory block [%llx:%x]\n", next_block,
+		next_offset);
+	goto exit_loop;
+}
+
+
+int squashfs_2_0_supported(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_super_block *sblk = &msblk->sblk;
+
+	msblk->read_inode = squashfs_read_inode_2;
+	msblk->read_fragment_index_table = read_fragment_index_table_2;
+
+	sblk->bytes_used = sblk->bytes_used_2;
+	sblk->uid_start = sblk->uid_start_2;
+	sblk->guid_start = sblk->guid_start_2;
+	sblk->inode_table_start = sblk->inode_table_start_2;
+	sblk->directory_table_start = sblk->directory_table_start_2;
+	sblk->fragment_table_start = sblk->fragment_table_start_2;
+
+	return 1;
+}
diff -Npur linux-2.6-block/include/asm-x86/suspend_32.h linux-2.6-block-custom/include/asm-x86/suspend_32.h
--- linux-2.6-block/include/asm-x86/suspend_32.h	2008-09-27 16:12:27.590897097 +0900
+++ linux-2.6-block-custom/include/asm-x86/suspend_32.h	2008-09-26 19:45:20.378787249 +0900
@@ -11,6 +11,9 @@
 
 static inline int arch_prepare_suspend(void) { return 0; }
 
+extern int toi_faulted;
+#define clear_toi_fault() do { toi_faulted = 0; } while (0)
+
 /* image of the saved processor state */
 struct saved_context {
 	u16 es, fs, gs, ss;
diff -Npur linux-2.6-block/include/asm-x86/suspend_64.h linux-2.6-block-custom/include/asm-x86/suspend_64.h
--- linux-2.6-block/include/asm-x86/suspend_64.h	2008-09-27 16:12:27.590897097 +0900
+++ linux-2.6-block-custom/include/asm-x86/suspend_64.h	2008-09-26 19:45:20.382755421 +0900
@@ -14,6 +14,9 @@ static inline int arch_prepare_suspend(v
 	return 0;
 }
 
+#define toi_faulted (0)
+#define clear_toi_fault() do { } while (0)
+
 /*
  * Image of the saved processor state, used by the low level ACPI suspend to
  * RAM code and by the low level hibernation code.
diff -Npur linux-2.6-block/include/linux/Kbuild linux-2.6-block-custom/include/linux/Kbuild
--- linux-2.6-block/include/linux/Kbuild	2008-09-27 16:12:58.922922956 +0900
+++ linux-2.6-block-custom/include/linux/Kbuild	2008-09-26 19:45:20.382755421 +0900
@@ -209,6 +209,7 @@ unifdef-y += filter.h
 unifdef-y += flat.h
 unifdef-y += futex.h
 unifdef-y += fs.h
+unifdef-y += freezer.h
 unifdef-y += gameport.h
 unifdef-y += generic_serial.h
 unifdef-y += hayesesp.h
diff -Npur linux-2.6-block/include/linux/buffer_head.h linux-2.6-block-custom/include/linux/buffer_head.h
--- linux-2.6-block/include/linux/buffer_head.h	2008-09-27 16:12:27.734897313 +0900
+++ linux-2.6-block-custom/include/linux/buffer_head.h	2008-09-26 19:45:20.382755421 +0900
@@ -171,6 +171,11 @@ wait_queue_head_t *bh_waitq_head(struct 
 int fsync_bdev(struct block_device *);
 struct super_block *freeze_bdev(struct block_device *);
 void thaw_bdev(struct block_device *, struct super_block *);
+#define FS_FREEZER_FUSE 1
+#define FS_FREEZER_NORMAL 2
+#define FS_FREEZER_ALL (FS_FREEZER_FUSE | FS_FREEZER_NORMAL)
+void freeze_filesystems(int which);
+void thaw_filesystems(int which);
 int fsync_super(struct super_block *);
 int fsync_no_super(struct block_device *);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
diff -Npur linux-2.6-block/include/linux/dyn_pageflags.h linux-2.6-block-custom/include/linux/dyn_pageflags.h
--- linux-2.6-block/include/linux/dyn_pageflags.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/include/linux/dyn_pageflags.h	2008-09-26 19:46:40.226755217 +0900
@@ -0,0 +1,66 @@
+/*
+ * include/linux/dyn_pageflags.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham <nigel at tuxonice net>
+ *
+ * This file is released under the GPLv2.
+ *
+ * It implements support for dynamically allocated bitmaps that are
+ * used for temporary or infrequently used pageflags, in lieu of
+ * bits in the struct page flags entry.
+ */
+
+#ifndef DYN_PAGEFLAGS_H
+#define DYN_PAGEFLAGS_H
+
+#include <linux/mm.h>
+
+struct dyn_pageflags {
+	unsigned long ****bitmap; /* [pg_dat][zone][page_num] */
+	int sparse, initialised;
+	struct list_head list;
+	spinlock_t struct_lock;
+};
+
+#define DYN_PAGEFLAGS_INIT(name) { \
+	.list = LIST_HEAD_INIT(name.list), \
+	.struct_lock = __SPIN_LOCK_UNLOCKED(name.lock) \
+}
+
+#define DECLARE_DYN_PAGEFLAGS(name) \
+	struct dyn_pageflags name = DYN_PAGEFLAGS_INIT(name);
+
+#define BITMAP_FOR_EACH_SET(BITMAP, CTR) \
+	for (CTR = get_next_bit_on(BITMAP, max_pfn + 1); CTR <= max_pfn; \
+		CTR = get_next_bit_on(BITMAP, CTR))
+
+extern void clear_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern int allocate_dyn_pageflags(struct dyn_pageflags *pagemap, int sparse);
+extern void free_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern unsigned long get_next_bit_on(struct dyn_pageflags *bitmap,
+	unsigned long counter);
+
+extern int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+/*
+ * In sparse bitmaps, setting a flag can fail (we can fail to allocate
+ * the page to store the bit. If this happens, we will BUG(). If you don't
+ * want this behaviour, don't allocate sparse pageflags.
+ */
+extern void set_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+extern void clear_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+extern void dump_pagemap(struct dyn_pageflags *pagemap);
+
+/*
+ * With the above macros defined, you can do...
+ * #define PagePageset1(page) (test_dynpageflag(&pageset1_map, page))
+ * #define SetPagePageset1(page) (set_dynpageflag(&pageset1_map, page))
+ * #define ClearPagePageset1(page) (clear_dynpageflag(&pageset1_map, page))
+ */
+
+extern void __init dyn_pageflags_init(void);
+extern void __init dyn_pageflags_use_kzalloc(void);
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+extern void dyn_pageflags_hotplug(struct zone *zone);
+#endif
+#endif
diff -Npur linux-2.6-block/include/linux/freezer.h linux-2.6-block-custom/include/linux/freezer.h
--- linux-2.6-block/include/linux/freezer.h	2008-09-27 16:12:27.958897081 +0900
+++ linux-2.6-block-custom/include/linux/freezer.h	2008-09-26 19:45:20.382755421 +0900
@@ -136,6 +136,19 @@ static inline void set_freezable_with_si
 	current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG);
 }
 
+extern int freezer_state;
+#define FREEZER_OFF 0
+#define FREEZER_FILESYSTEMS_FROZEN 1
+#define FREEZER_USERSPACE_FROZEN 2
+#define FREEZER_FULLY_ON 3
+
+static inline int freezer_is_on(void)
+{
+	return (freezer_state == FREEZER_FULLY_ON);
+}
+
+extern void thaw_kernel_threads(void);
+
 /*
  * Freezer-friendly wrappers around wait_event_interruptible() and
  * wait_event_interruptible_timeout(), originally defined in <linux/wait.h>
@@ -178,6 +191,8 @@ static inline int freeze_processes(void)
 static inline void thaw_processes(void) {}
 
 static inline int try_to_freeze(void) { return 0; }
+static inline int freezer_is_on(void) { return 0; }
+static inline void thaw_kernel_threads(void) { }
 
 static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
diff -Npur linux-2.6-block/include/linux/fs.h linux-2.6-block-custom/include/linux/fs.h
--- linux-2.6-block/include/linux/fs.h	2008-09-27 16:12:58.938944196 +0900
+++ linux-2.6-block-custom/include/linux/fs.h	2008-09-26 19:45:20.386755580 +0900
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/freezer.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -98,6 +99,7 @@ extern int dir_notify_enable;
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
 #define FS_HAS_SUBTYPE 4
+#define FS_IS_FUSE	8	/* Fuse filesystem - bdev freeze these too */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
 					 * during rename() internally.
@@ -130,6 +132,7 @@ extern int dir_notify_enable;
 #define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
 #define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+#define MS_FROZEN	(1<<24)	/* Frozen by freeze_filesystems() */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
@@ -1144,8 +1147,11 @@ enum {
 	SB_FREEZE_TRANS = 2,
 };
 
-#define vfs_check_frozen(sb, level) \
-	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level)))
+#define vfs_check_frozen(sb, level) do { \
+	freezer_do_not_count(); \
+	wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))); \
+	freezer_count(); \
+} while (0)
 
 #define get_fs_excl() atomic_inc(&current->fs_excl)
 #define put_fs_excl() atomic_dec(&current->fs_excl)
diff -Npur linux-2.6-block/include/linux/kernel.h linux-2.6-block-custom/include/linux/kernel.h
--- linux-2.6-block/include/linux/kernel.h	2008-09-27 16:12:28.078893268 +0900
+++ linux-2.6-block-custom/include/linux/kernel.h	2008-09-26 19:45:20.386755580 +0900
@@ -165,6 +165,8 @@ extern int vsprintf(char *buf, const cha
 	__attribute__ ((format (printf, 2, 0)));
 extern int snprintf(char * buf, size_t size, const char * fmt, ...)
 	__attribute__ ((format (printf, 3, 4)));
+extern int snprintf_used(char *buffer, int buffer_size,
+		const char *fmt, ...);
 extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 	__attribute__ ((format (printf, 3, 0)));
 extern int scnprintf(char * buf, size_t size, const char * fmt, ...)
diff -Npur linux-2.6-block/include/linux/mm.h linux-2.6-block-custom/include/linux/mm.h
--- linux-2.6-block/include/linux/mm.h	2008-09-27 16:12:28.134897104 +0900
+++ linux-2.6-block-custom/include/linux/mm.h	2008-09-26 19:45:20.386755580 +0900
@@ -1264,6 +1264,7 @@ int drop_caches_sysctl_handler(struct ct
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			unsigned long lru_pages);
+void drop_pagecache(void);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff -Npur linux-2.6-block/include/linux/netlink.h linux-2.6-block-custom/include/linux/netlink.h
--- linux-2.6-block/include/linux/netlink.h	2008-09-27 16:12:28.222897185 +0900
+++ linux-2.6-block-custom/include/linux/netlink.h	2008-09-26 19:45:20.386755580 +0900
@@ -24,6 +24,8 @@
 /* leave room for NETLINK_DM (DM Events) */
 #define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
 #define NETLINK_ECRYPTFS	19
+#define NETLINK_TOI_USERUI	20	/* TuxOnIce's userui */
+#define NETLINK_TOI_USM		21	/* Userspace storage manager */
 
 #define MAX_LINKS 32		
 
diff -Npur linux-2.6-block/include/linux/squashfs_fs.h linux-2.6-block-custom/include/linux/squashfs_fs.h
--- linux-2.6-block/include/linux/squashfs_fs.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/include/linux/squashfs_fs.h	2008-09-26 20:30:06.850751003 +0900
@@ -0,0 +1,935 @@
+#ifndef SQUASHFS_FS
+#define SQUASHFS_FS
+
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * squashfs_fs.h
+ */
+
+#ifndef CONFIG_SQUASHFS_2_0_COMPATIBILITY
+#define CONFIG_SQUASHFS_2_0_COMPATIBILITY
+#endif
+
+#define SQUASHFS_CACHED_FRAGMENTS	CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE	
+#define SQUASHFS_MAJOR			3
+#define SQUASHFS_MINOR			1
+#define SQUASHFS_MAGIC			0x73717368
+#define SQUASHFS_MAGIC_SWAP		0x68737173
+#define SQUASHFS_START			0
+
+/* size of metadata (inode and directory) blocks */
+#define SQUASHFS_METADATA_SIZE		8192
+#define SQUASHFS_METADATA_LOG		13
+
+/* default size of data blocks */
+#define SQUASHFS_FILE_SIZE		131072
+#define SQUASHFS_FILE_LOG		17
+
+#define SQUASHFS_FILE_MAX_SIZE		1048576
+
+/* Max number of uids and gids */
+#define SQUASHFS_UIDS			256
+#define SQUASHFS_GUIDS			255
+
+/* Max length of filename (not 255) */
+#define SQUASHFS_NAME_LEN		256
+
+#define SQUASHFS_INVALID		((long long) 0xffffffffffff)
+#define SQUASHFS_INVALID_FRAG		((unsigned int) 0xffffffff)
+#define SQUASHFS_INVALID_BLK		((long long) -1)
+#define SQUASHFS_USED_BLK		((long long) -2)
+
+/* Filesystem flags */
+#define SQUASHFS_NOI			0
+#define SQUASHFS_NOD			1
+#define SQUASHFS_CHECK			2
+#define SQUASHFS_NOF			3
+#define SQUASHFS_NO_FRAG		4
+#define SQUASHFS_ALWAYS_FRAG		5
+#define SQUASHFS_DUPLICATE		6
+#define SQUASHFS_EXPORT			7
+
+#define SQUASHFS_BIT(flag, bit)		((flag >> bit) & 1)
+
+#define SQUASHFS_UNCOMPRESSED_INODES(flags)	SQUASHFS_BIT(flags, \
+						SQUASHFS_NOI)
+
+#define SQUASHFS_UNCOMPRESSED_DATA(flags)	SQUASHFS_BIT(flags, \
+						SQUASHFS_NOD)
+
+#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags)	SQUASHFS_BIT(flags, \
+						SQUASHFS_NOF)
+
+#define SQUASHFS_NO_FRAGMENTS(flags)		SQUASHFS_BIT(flags, \
+						SQUASHFS_NO_FRAG)
+
+#define SQUASHFS_ALWAYS_FRAGMENTS(flags)	SQUASHFS_BIT(flags, \
+						SQUASHFS_ALWAYS_FRAG)
+
+#define SQUASHFS_DUPLICATES(flags)		SQUASHFS_BIT(flags, \
+						SQUASHFS_DUPLICATE)
+
+#define SQUASHFS_EXPORTABLE(flags)		SQUASHFS_BIT(flags, \
+						SQUASHFS_EXPORT)
+
+#define SQUASHFS_CHECK_DATA(flags)		SQUASHFS_BIT(flags, \
+						SQUASHFS_CHECK)
+
+#define SQUASHFS_MKFLAGS(noi, nod, check_data, nof, no_frag, always_frag, \
+		duplicate_checking, exportable)	(noi | (nod << 1) | (check_data << 2) \
+		| (nof << 3) | (no_frag << 4) | (always_frag << 5) | \
+		(duplicate_checking << 6) | (exportable << 7))
+
+/* Max number of types and file types */
+#define SQUASHFS_DIR_TYPE		1
+#define SQUASHFS_FILE_TYPE		2
+#define SQUASHFS_SYMLINK_TYPE		3
+#define SQUASHFS_BLKDEV_TYPE		4
+#define SQUASHFS_CHRDEV_TYPE		5
+#define SQUASHFS_FIFO_TYPE		6
+#define SQUASHFS_SOCKET_TYPE		7
+#define SQUASHFS_LDIR_TYPE		8
+#define SQUASHFS_LREG_TYPE		9
+
+/* 1.0 filesystem type definitions */
+#define SQUASHFS_TYPES			5
+#define SQUASHFS_IPC_TYPE		0
+
+/* Flag whether block is compressed or uncompressed, bit is set if block is
+ * uncompressed */
+#define SQUASHFS_COMPRESSED_BIT		(1 << 15)
+
+#define SQUASHFS_COMPRESSED_SIZE(B)	(((B) & ~SQUASHFS_COMPRESSED_BIT) ? \
+		(B) & ~SQUASHFS_COMPRESSED_BIT :  SQUASHFS_COMPRESSED_BIT)
+
+#define SQUASHFS_COMPRESSED(B)		(!((B) & SQUASHFS_COMPRESSED_BIT))
+
+#define SQUASHFS_COMPRESSED_BIT_BLOCK		(1 << 24)
+
+#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B)	((B) & \
+	~SQUASHFS_COMPRESSED_BIT_BLOCK)
+
+#define SQUASHFS_COMPRESSED_BLOCK(B)	(!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
+
+/*
+ * Inode number ops.  Inodes consist of a compressed block number, and an
+ * uncompressed  offset within that block
+ */
+#define SQUASHFS_INODE_BLK(a)		((unsigned int) ((a) >> 16))
+
+#define SQUASHFS_INODE_OFFSET(a)	((unsigned int) ((a) & 0xffff))
+
+#define SQUASHFS_MKINODE(A, B)		((squashfs_inode_t)(((squashfs_inode_t) (A)\
+					<< 16) + (B)))
+
+/* Compute 32 bit VFS inode number from squashfs inode number */
+#define SQUASHFS_MK_VFS_INODE(a, b)	((unsigned int) (((a) << 8) + \
+					((b) >> 2) + 1))
+/* XXX */
+
+/* Translate between VFS mode and squashfs mode */
+#define SQUASHFS_MODE(a)		((a) & 0xfff)
+
+/* fragment and fragment table defines */
+#define SQUASHFS_FRAGMENT_BYTES(A)	((A) * sizeof(struct squashfs_fragment_entry))
+
+#define SQUASHFS_FRAGMENT_INDEX(A)	(SQUASHFS_FRAGMENT_BYTES(A) / \
+					SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A)	(SQUASHFS_FRAGMENT_BYTES(A) % \
+						SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEXES(A)	((SQUASHFS_FRAGMENT_BYTES(A) + \
+					SQUASHFS_METADATA_SIZE - 1) / \
+					SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_BYTES(A)	(SQUASHFS_FRAGMENT_INDEXES(A) *\
+						sizeof(long long))
+
+/* inode lookup table defines */
+#define SQUASHFS_LOOKUP_BYTES(A)	((A) * sizeof(squashfs_inode_t))
+
+#define SQUASHFS_LOOKUP_BLOCK(A)		(SQUASHFS_LOOKUP_BYTES(A) / \
+						SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCK_OFFSET(A)		(SQUASHFS_LOOKUP_BYTES(A) % \
+						SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCKS(A)	((SQUASHFS_LOOKUP_BYTES(A) + \
+					SQUASHFS_METADATA_SIZE - 1) / \
+					SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCK_BYTES(A)	(SQUASHFS_LOOKUP_BLOCKS(A) *\
+					sizeof(long long))
+
+/* cached data constants for filesystem */
+#define SQUASHFS_CACHED_BLKS		8
+
+#define SQUASHFS_MAX_FILE_SIZE_LOG	64
+
+#define SQUASHFS_MAX_FILE_SIZE		((long long) 1 << \
+					(SQUASHFS_MAX_FILE_SIZE_LOG - 2))
+
+#define SQUASHFS_MARKER_BYTE		0xff
+
+/* meta index cache */
+#define SQUASHFS_META_INDEXES	(SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
+#define SQUASHFS_META_ENTRIES	31
+#define SQUASHFS_META_NUMBER	8
+#define SQUASHFS_SLOTS		4
+
+struct meta_entry {
+	long long		data_block;
+	unsigned int		index_block;
+	unsigned short		offset;
+	unsigned short		pad;
+};
+
+struct meta_index {
+	unsigned int		inode_number;
+	unsigned int		offset;
+	unsigned short		entries;
+	unsigned short		skip;
+	unsigned short		locked;
+	unsigned short		pad;
+	struct meta_entry	meta_entry[SQUASHFS_META_ENTRIES];
+};
+
+
+/*
+ * definitions for structures on disk
+ */
+
+typedef long long		squashfs_block_t;
+typedef long long		squashfs_inode_t;
+
+struct squashfs_super_block {
+	unsigned int		s_magic;
+	unsigned int		inodes;
+	unsigned int		bytes_used_2;
+	unsigned int		uid_start_2;
+	unsigned int		guid_start_2;
+	unsigned int		inode_table_start_2;
+	unsigned int		directory_table_start_2;
+	unsigned int		s_major:16;
+	unsigned int		s_minor:16;
+	unsigned int		block_size_1:16;
+	unsigned int		block_log:16;
+	unsigned int		flags:8;
+	unsigned int		no_uids:8;
+	unsigned int		no_guids:8;
+	unsigned int		mkfs_time /* time of filesystem creation */;
+	squashfs_inode_t	root_inode;
+	unsigned int		block_size;
+	unsigned int		fragments;
+	unsigned int		fragment_table_start_2;
+	long long		bytes_used;
+	long long		uid_start;
+	long long		guid_start;
+	long long		inode_table_start;
+	long long		directory_table_start;
+	long long		fragment_table_start;
+	long long		lookup_table_start;
+} __attribute__ ((packed));
+
+struct squashfs_dir_index {
+	unsigned int		index;
+	unsigned int		start_block;
+	unsigned char		size;
+	unsigned char		name[0];
+} __attribute__ ((packed));
+
+#define SQUASHFS_BASE_INODE_HEADER		\
+	unsigned int		inode_type:4;	\
+	unsigned int		mode:12;	\
+	unsigned int		uid:8;		\
+	unsigned int		guid:8;		\
+	unsigned int		mtime;		\
+	unsigned int 		inode_number;
+
+struct squashfs_base_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+} __attribute__ ((packed));
+
+struct squashfs_ipc_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+} __attribute__ ((packed));
+
+struct squashfs_dev_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+	unsigned short		rdev;
+} __attribute__ ((packed));
+	
+struct squashfs_symlink_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+	unsigned short		symlink_size;
+	char			symlink[0];
+} __attribute__ ((packed));
+
+struct squashfs_reg_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	squashfs_block_t	start_block;
+	unsigned int		fragment;
+	unsigned int		offset;
+	unsigned int		file_size;
+	unsigned short		block_list[0];
+} __attribute__ ((packed));
+
+struct squashfs_lreg_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+	squashfs_block_t	start_block;
+	unsigned int		fragment;
+	unsigned int		offset;
+	long long		file_size;
+	unsigned short		block_list[0];
+} __attribute__ ((packed));
+
+struct squashfs_dir_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+	unsigned int		file_size:19;
+	unsigned int		offset:13;
+	unsigned int		start_block;
+	unsigned int		parent_inode;
+} __attribute__  ((packed));
+
+struct squashfs_ldir_inode_header {
+	SQUASHFS_BASE_INODE_HEADER;
+	unsigned int		nlink;
+	unsigned int		file_size:27;
+	unsigned int		offset:13;
+	unsigned int		start_block;
+	unsigned int		i_count:16;
+	unsigned int		parent_inode;
+	struct squashfs_dir_index	index[0];
+} __attribute__  ((packed));
+
+union squashfs_inode_header {
+	struct squashfs_base_inode_header	base;
+	struct squashfs_dev_inode_header	dev;
+	struct squashfs_symlink_inode_header	symlink;
+	struct squashfs_reg_inode_header	reg;
+	struct squashfs_lreg_inode_header	lreg;
+	struct squashfs_dir_inode_header	dir;
+	struct squashfs_ldir_inode_header	ldir;
+	struct squashfs_ipc_inode_header	ipc;
+};
+	
+struct squashfs_dir_entry {
+	unsigned int		offset:13;
+	unsigned int		type:3;
+	unsigned int		size:8;
+	int			inode_number:16;
+	char			name[0];
+} __attribute__ ((packed));
+
+struct squashfs_dir_header {
+	unsigned int		count:8;
+	unsigned int		start_block;
+	unsigned int		inode_number;
+} __attribute__ ((packed));
+
+struct squashfs_fragment_entry {
+	long long		start_block;
+	unsigned int		size;
+	unsigned int		unused;
+} __attribute__ ((packed));
+
+extern int squashfs_uncompress_block(void *d, int dstlen, void *s, int srclen);
+extern int squashfs_uncompress_init(void);
+extern int squashfs_uncompress_exit(void);
+
+/*
+ * macros to convert each packed bitfield structure from little endian to big
+ * endian and vice versa.  These are needed when creating or using a filesystem
+ * on a machine with different byte ordering to the target architecture.
+ *
+ */
+
+#define SQUASHFS_SWAP_START \
+	int bits;\
+	int b_pos;\
+	unsigned long long val;\
+	unsigned char *s;\
+	unsigned char *d;
+
+#define SQUASHFS_SWAP_SUPER_BLOCK(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_super_block));\
+	SQUASHFS_SWAP((s)->s_magic, d, 0, 32);\
+	SQUASHFS_SWAP((s)->inodes, d, 32, 32);\
+	SQUASHFS_SWAP((s)->bytes_used_2, d, 64, 32);\
+	SQUASHFS_SWAP((s)->uid_start_2, d, 96, 32);\
+	SQUASHFS_SWAP((s)->guid_start_2, d, 128, 32);\
+	SQUASHFS_SWAP((s)->inode_table_start_2, d, 160, 32);\
+	SQUASHFS_SWAP((s)->directory_table_start_2, d, 192, 32);\
+	SQUASHFS_SWAP((s)->s_major, d, 224, 16);\
+	SQUASHFS_SWAP((s)->s_minor, d, 240, 16);\
+	SQUASHFS_SWAP((s)->block_size_1, d, 256, 16);\
+	SQUASHFS_SWAP((s)->block_log, d, 272, 16);\
+	SQUASHFS_SWAP((s)->flags, d, 288, 8);\
+	SQUASHFS_SWAP((s)->no_uids, d, 296, 8);\
+	SQUASHFS_SWAP((s)->no_guids, d, 304, 8);\
+	SQUASHFS_SWAP((s)->mkfs_time, d, 312, 32);\
+	SQUASHFS_SWAP((s)->root_inode, d, 344, 64);\
+	SQUASHFS_SWAP((s)->block_size, d, 408, 32);\
+	SQUASHFS_SWAP((s)->fragments, d, 440, 32);\
+	SQUASHFS_SWAP((s)->fragment_table_start_2, d, 472, 32);\
+	SQUASHFS_SWAP((s)->bytes_used, d, 504, 64);\
+	SQUASHFS_SWAP((s)->uid_start, d, 568, 64);\
+	SQUASHFS_SWAP((s)->guid_start, d, 632, 64);\
+	SQUASHFS_SWAP((s)->inode_table_start, d, 696, 64);\
+	SQUASHFS_SWAP((s)->directory_table_start, d, 760, 64);\
+	SQUASHFS_SWAP((s)->fragment_table_start, d, 824, 64);\
+	SQUASHFS_SWAP((s)->lookup_table_start, d, 888, 64);\
+}
+
+#define SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\
+	SQUASHFS_MEMSET(s, d, n);\
+	SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\
+	SQUASHFS_SWAP((s)->mode, d, 4, 12);\
+	SQUASHFS_SWAP((s)->uid, d, 16, 8);\
+	SQUASHFS_SWAP((s)->guid, d, 24, 8);\
+	SQUASHFS_SWAP((s)->mtime, d, 32, 32);\
+	SQUASHFS_SWAP((s)->inode_number, d, 64, 32);
+
+#define SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, n) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\
+}
+
+#define SQUASHFS_SWAP_IPC_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_ipc_inode_header))\
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+}
+
+#define SQUASHFS_SWAP_DEV_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_dev_inode_header)); \
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+	SQUASHFS_SWAP((s)->rdev, d, 128, 16);\
+}
+
+#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_symlink_inode_header));\
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+	SQUASHFS_SWAP((s)->symlink_size, d, 128, 16);\
+}
+
+#define SQUASHFS_SWAP_REG_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_reg_inode_header));\
+	SQUASHFS_SWAP((s)->start_block, d, 96, 64);\
+	SQUASHFS_SWAP((s)->fragment, d, 160, 32);\
+	SQUASHFS_SWAP((s)->offset, d, 192, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 224, 32);\
+}
+
+#define SQUASHFS_SWAP_LREG_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_lreg_inode_header));\
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 128, 64);\
+	SQUASHFS_SWAP((s)->fragment, d, 192, 32);\
+	SQUASHFS_SWAP((s)->offset, d, 224, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 256, 64);\
+}
+
+#define SQUASHFS_SWAP_DIR_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_dir_inode_header));\
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 128, 19);\
+	SQUASHFS_SWAP((s)->offset, d, 147, 13);\
+	SQUASHFS_SWAP((s)->start_block, d, 160, 32);\
+	SQUASHFS_SWAP((s)->parent_inode, d, 192, 32);\
+}
+
+#define SQUASHFS_SWAP_LDIR_INODE_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \
+			sizeof(struct squashfs_ldir_inode_header));\
+	SQUASHFS_SWAP((s)->nlink, d, 96, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 128, 27);\
+	SQUASHFS_SWAP((s)->offset, d, 155, 13);\
+	SQUASHFS_SWAP((s)->start_block, d, 168, 32);\
+	SQUASHFS_SWAP((s)->i_count, d, 200, 16);\
+	SQUASHFS_SWAP((s)->parent_inode, d, 216, 32);\
+}
+
+#define SQUASHFS_SWAP_DIR_INDEX(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index));\
+	SQUASHFS_SWAP((s)->index, d, 0, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 32, 32);\
+	SQUASHFS_SWAP((s)->size, d, 64, 8);\
+}
+
+#define SQUASHFS_SWAP_DIR_HEADER(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header));\
+	SQUASHFS_SWAP((s)->count, d, 0, 8);\
+	SQUASHFS_SWAP((s)->start_block, d, 8, 32);\
+	SQUASHFS_SWAP((s)->inode_number, d, 40, 32);\
+}
+
+#define SQUASHFS_SWAP_DIR_ENTRY(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry));\
+	SQUASHFS_SWAP((s)->offset, d, 0, 13);\
+	SQUASHFS_SWAP((s)->type, d, 13, 3);\
+	SQUASHFS_SWAP((s)->size, d, 16, 8);\
+	SQUASHFS_SWAP((s)->inode_number, d, 24, 16);\
+}
+
+#define SQUASHFS_SWAP_FRAGMENT_ENTRY(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry));\
+	SQUASHFS_SWAP((s)->start_block, d, 0, 64);\
+	SQUASHFS_SWAP((s)->size, d, 64, 32);\
+}
+
+#define SQUASHFS_SWAP_INODE_T(s, d) SQUASHFS_SWAP_LONG_LONGS(s, d, 1)
+
+#define SQUASHFS_SWAP_SHORTS(s, d, n) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, n * 2);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \
+			16)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, 16);\
+}
+
+#define SQUASHFS_SWAP_INTS(s, d, n) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, n * 4);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \
+			32)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, 32);\
+}
+
+#define SQUASHFS_SWAP_LONG_LONGS(s, d, n) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, n * 8);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \
+			64)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, 64);\
+}
+
+#define SQUASHFS_SWAP_DATA(s, d, n, bits) {\
+	int entry;\
+	int bit_position;\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, n * bits / 8);\
+	for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \
+			bits)\
+		SQUASHFS_SWAP(s[entry], d, bit_position, bits);\
+}
+
+#define SQUASHFS_SWAP_FRAGMENT_INDEXES(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n)
+#define SQUASHFS_SWAP_LOOKUP_BLOCKS(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n)
+
+#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY
+
+struct squashfs_base_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+} __attribute__ ((packed));
+
+struct squashfs_ipc_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+	unsigned int		type:4;
+	unsigned int		offset:4;
+} __attribute__ ((packed));
+
+struct squashfs_dev_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+	unsigned short		rdev;
+} __attribute__ ((packed));
+	
+struct squashfs_symlink_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+	unsigned short		symlink_size;
+	char			symlink[0];
+} __attribute__ ((packed));
+
+struct squashfs_reg_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+	unsigned int		mtime;
+	unsigned int		start_block;
+	unsigned int		file_size:32;
+	unsigned short		block_list[0];
+} __attribute__ ((packed));
+
+struct squashfs_dir_inode_header_1 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:4; /* index into uid table */
+	unsigned int		guid:4; /* index into guid table */
+	unsigned int		file_size:19;
+	unsigned int		offset:13;
+	unsigned int		mtime;
+	unsigned int		start_block:24;
+} __attribute__  ((packed));
+
+union squashfs_inode_header_1 {
+	struct squashfs_base_inode_header_1	base;
+	struct squashfs_dev_inode_header_1	dev;
+	struct squashfs_symlink_inode_header_1	symlink;
+	struct squashfs_reg_inode_header_1	reg;
+	struct squashfs_dir_inode_header_1	dir;
+	struct squashfs_ipc_inode_header_1	ipc;
+};
+
+#define SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n) \
+	SQUASHFS_MEMSET(s, d, n);\
+	SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\
+	SQUASHFS_SWAP((s)->mode, d, 4, 12);\
+	SQUASHFS_SWAP((s)->uid, d, 16, 4);\
+	SQUASHFS_SWAP((s)->guid, d, 20, 4);
+
+#define SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, n) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n)\
+}
+
+#define SQUASHFS_SWAP_IPC_INODE_HEADER_1(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \
+			sizeof(struct squashfs_ipc_inode_header_1));\
+	SQUASHFS_SWAP((s)->type, d, 24, 4);\
+	SQUASHFS_SWAP((s)->offset, d, 28, 4);\
+}
+
+#define SQUASHFS_SWAP_DEV_INODE_HEADER_1(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \
+			sizeof(struct squashfs_dev_inode_header_1));\
+	SQUASHFS_SWAP((s)->rdev, d, 24, 16);\
+}
+
+#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_1(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \
+			sizeof(struct squashfs_symlink_inode_header_1));\
+	SQUASHFS_SWAP((s)->symlink_size, d, 24, 16);\
+}
+
+#define SQUASHFS_SWAP_REG_INODE_HEADER_1(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \
+			sizeof(struct squashfs_reg_inode_header_1));\
+	SQUASHFS_SWAP((s)->mtime, d, 24, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 56, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 88, 32);\
+}
+
+#define SQUASHFS_SWAP_DIR_INODE_HEADER_1(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \
+			sizeof(struct squashfs_dir_inode_header_1));\
+	SQUASHFS_SWAP((s)->file_size, d, 24, 19);\
+	SQUASHFS_SWAP((s)->offset, d, 43, 13);\
+	SQUASHFS_SWAP((s)->mtime, d, 56, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 88, 24);\
+}
+
+#endif
+
+#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY
+
+struct squashfs_dir_index_2 {
+	unsigned int		index:27;
+	unsigned int		start_block:29;
+	unsigned char		size;
+	unsigned char		name[0];
+} __attribute__ ((packed));
+
+struct squashfs_base_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+} __attribute__ ((packed));
+
+struct squashfs_ipc_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+} __attribute__ ((packed));
+
+struct squashfs_dev_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+	unsigned short		rdev;
+} __attribute__ ((packed));
+	
+struct squashfs_symlink_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+	unsigned short		symlink_size;
+	char			symlink[0];
+} __attribute__ ((packed));
+
+struct squashfs_reg_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+	unsigned int		mtime;
+	unsigned int		start_block;
+	unsigned int		fragment;
+	unsigned int		offset;
+	unsigned int		file_size:32;
+	unsigned short		block_list[0];
+} __attribute__ ((packed));
+
+struct squashfs_dir_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+	unsigned int		file_size:19;
+	unsigned int		offset:13;
+	unsigned int		mtime;
+	unsigned int		start_block:24;
+} __attribute__  ((packed));
+
+struct squashfs_ldir_inode_header_2 {
+	unsigned int		inode_type:4;
+	unsigned int		mode:12; /* protection */
+	unsigned int		uid:8; /* index into uid table */
+	unsigned int		guid:8; /* index into guid table */
+	unsigned int		file_size:27;
+	unsigned int		offset:13;
+	unsigned int		mtime;
+	unsigned int		start_block:24;
+	unsigned int		i_count:16;
+	struct squashfs_dir_index_2	index[0];
+} __attribute__  ((packed));
+
+union squashfs_inode_header_2 {
+	struct squashfs_base_inode_header_2	base;
+	struct squashfs_dev_inode_header_2	dev;
+	struct squashfs_symlink_inode_header_2	symlink;
+	struct squashfs_reg_inode_header_2	reg;
+	struct squashfs_dir_inode_header_2	dir;
+	struct squashfs_ldir_inode_header_2	ldir;
+	struct squashfs_ipc_inode_header_2	ipc;
+};
+	
+struct squashfs_dir_header_2 {
+	unsigned int		count:8;
+	unsigned int		start_block:24;
+} __attribute__ ((packed));
+
+struct squashfs_dir_entry_2 {
+	unsigned int		offset:13;
+	unsigned int		type:3;
+	unsigned int		size:8;
+	char			name[0];
+} __attribute__ ((packed));
+
+struct squashfs_fragment_entry_2 {
+	unsigned int		start_block;
+	unsigned int		size;
+} __attribute__ ((packed));
+
+#define SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\
+	SQUASHFS_MEMSET(s, d, n);\
+	SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\
+	SQUASHFS_SWAP((s)->mode, d, 4, 12);\
+	SQUASHFS_SWAP((s)->uid, d, 16, 8);\
+	SQUASHFS_SWAP((s)->guid, d, 24, 8);\
+
+#define SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, n) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\
+}
+
+#define SQUASHFS_SWAP_IPC_INODE_HEADER_2(s, d) \
+	SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, sizeof(struct squashfs_ipc_inode_header_2))
+
+#define SQUASHFS_SWAP_DEV_INODE_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \
+			sizeof(struct squashfs_dev_inode_header_2)); \
+	SQUASHFS_SWAP((s)->rdev, d, 32, 16);\
+}
+
+#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \
+			sizeof(struct squashfs_symlink_inode_header_2));\
+	SQUASHFS_SWAP((s)->symlink_size, d, 32, 16);\
+}
+
+#define SQUASHFS_SWAP_REG_INODE_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \
+			sizeof(struct squashfs_reg_inode_header_2));\
+	SQUASHFS_SWAP((s)->mtime, d, 32, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 64, 32);\
+	SQUASHFS_SWAP((s)->fragment, d, 96, 32);\
+	SQUASHFS_SWAP((s)->offset, d, 128, 32);\
+	SQUASHFS_SWAP((s)->file_size, d, 160, 32);\
+}
+
+#define SQUASHFS_SWAP_DIR_INODE_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \
+			sizeof(struct squashfs_dir_inode_header_2));\
+	SQUASHFS_SWAP((s)->file_size, d, 32, 19);\
+	SQUASHFS_SWAP((s)->offset, d, 51, 13);\
+	SQUASHFS_SWAP((s)->mtime, d, 64, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 96, 24);\
+}
+
+#define SQUASHFS_SWAP_LDIR_INODE_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \
+			sizeof(struct squashfs_ldir_inode_header_2));\
+	SQUASHFS_SWAP((s)->file_size, d, 32, 27);\
+	SQUASHFS_SWAP((s)->offset, d, 59, 13);\
+	SQUASHFS_SWAP((s)->mtime, d, 72, 32);\
+	SQUASHFS_SWAP((s)->start_block, d, 104, 24);\
+	SQUASHFS_SWAP((s)->i_count, d, 128, 16);\
+}
+
+#define SQUASHFS_SWAP_DIR_INDEX_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index_2));\
+	SQUASHFS_SWAP((s)->index, d, 0, 27);\
+	SQUASHFS_SWAP((s)->start_block, d, 27, 29);\
+	SQUASHFS_SWAP((s)->size, d, 56, 8);\
+}
+#define SQUASHFS_SWAP_DIR_HEADER_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header_2));\
+	SQUASHFS_SWAP((s)->count, d, 0, 8);\
+	SQUASHFS_SWAP((s)->start_block, d, 8, 24);\
+}
+
+#define SQUASHFS_SWAP_DIR_ENTRY_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry_2));\
+	SQUASHFS_SWAP((s)->offset, d, 0, 13);\
+	SQUASHFS_SWAP((s)->type, d, 13, 3);\
+	SQUASHFS_SWAP((s)->size, d, 16, 8);\
+}
+
+#define SQUASHFS_SWAP_FRAGMENT_ENTRY_2(s, d) {\
+	SQUASHFS_SWAP_START\
+	SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry_2));\
+	SQUASHFS_SWAP((s)->start_block, d, 0, 32);\
+	SQUASHFS_SWAP((s)->size, d, 32, 32);\
+}
+
+#define SQUASHFS_SWAP_FRAGMENT_INDEXES_2(s, d, n) SQUASHFS_SWAP_INTS(s, d, n)
+
+/* fragment and fragment table defines */
+#define SQUASHFS_FRAGMENT_BYTES_2(A)	(A * sizeof(struct squashfs_fragment_entry_2))
+
+#define SQUASHFS_FRAGMENT_INDEX_2(A)	(SQUASHFS_FRAGMENT_BYTES_2(A) / \
+					SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_OFFSET_2(A)	(SQUASHFS_FRAGMENT_BYTES_2(A) % \
+						SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEXES_2(A)	((SQUASHFS_FRAGMENT_BYTES_2(A) + \
+					SQUASHFS_METADATA_SIZE - 1) / \
+					SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_BYTES_2(A)	(SQUASHFS_FRAGMENT_INDEXES_2(A) *\
+						sizeof(int))
+
+#endif
+
+#ifdef __KERNEL__
+
+/*
+ * macros used to swap each structure entry, taking into account
+ * bitfields and different bitfield placing conventions on differing
+ * architectures
+ */
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
+	/* convert from little endian to big endian */
+#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \
+		tbits, b_pos)
+#else
+	/* convert from big endian to little endian */ 
+#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \
+		tbits, 64 - tbits - b_pos)
+#endif
+
+#define _SQUASHFS_SWAP(value, p, pos, tbits, SHIFT) {\
+	b_pos = pos % 8;\
+	val = 0;\
+	s = (unsigned char *)p + (pos / 8);\
+	d = ((unsigned char *) &val) + 7;\
+	for(bits = 0; bits < (tbits + b_pos); bits += 8) \
+		*d-- = *s++;\
+	value = (val >> (SHIFT))/* & ((1 << tbits) - 1)*/;\
+}
+
+#define SQUASHFS_MEMSET(s, d, n)	memset(s, 0, n);
+
+#endif
+#endif
diff -Npur linux-2.6-block/include/linux/squashfs_fs_i.h linux-2.6-block-custom/include/linux/squashfs_fs_i.h
--- linux-2.6-block/include/linux/squashfs_fs_i.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/include/linux/squashfs_fs_i.h	2008-09-26 20:30:06.850751003 +0900
@@ -0,0 +1,45 @@
+#ifndef SQUASHFS_FS_I
+#define SQUASHFS_FS_I
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * squashfs_fs_i.h
+ */
+
+struct squashfs_inode_info {
+	long long	start_block;
+	unsigned int	offset;
+	union {
+		struct {
+			long long	fragment_start_block;
+			unsigned int	fragment_size;
+			unsigned int	fragment_offset;
+			long long	block_list_start;
+		} s1;
+		struct {
+			long long	directory_index_start;
+			unsigned int	directory_index_offset;
+			unsigned int	directory_index_count;
+			unsigned int	parent_inode;
+		} s2;
+	} u;
+	struct inode	vfs_inode;
+};
+#endif
diff -Npur linux-2.6-block/include/linux/squashfs_fs_sb.h linux-2.6-block-custom/include/linux/squashfs_fs_sb.h
--- linux-2.6-block/include/linux/squashfs_fs_sb.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/include/linux/squashfs_fs_sb.h	2008-09-26 20:30:06.850751003 +0900
@@ -0,0 +1,79 @@
+#ifndef SQUASHFS_FS_SB
+#define SQUASHFS_FS_SB
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * squashfs_fs_sb.h
+ */
+
+#include <linux/squashfs_fs.h>
+
+struct squashfs_cache_entry {
+	long long	block;
+	int		length;
+	int		locked;
+	long long	next_index;
+	char		pending;
+	char		error;
+	int		waiting;
+	wait_queue_head_t	wait_queue;
+	char		*data;
+};
+
+struct squashfs_cache {
+	char *name;
+	int entries;
+	int block_size;
+	int next_blk;
+	int waiting;
+	int unused_blks;
+	int use_vmalloc;
+	spinlock_t lock;
+	wait_queue_head_t wait_queue;
+	struct squashfs_cache_entry entry[0];
+};
+
+struct squashfs_sb_info {
+	struct squashfs_super_block	sblk;
+	int			devblksize;
+	int			devblksize_log2;
+	int			swap;
+	struct squashfs_cache	*block_cache;
+	struct squashfs_cache	*fragment_cache;
+	int			next_meta_index;
+	unsigned int		*uid;
+	unsigned int		*guid;
+	long long		*fragment_index;
+	unsigned int		*fragment_index_2;
+	char			*read_page;
+	struct mutex		read_data_mutex;
+	struct mutex		read_page_mutex;
+	struct mutex		meta_index_mutex;
+	struct meta_index	*meta_index;
+	z_stream		stream;
+	long long		*inode_lookup_table;
+	int			(*read_inode)(struct inode *i,  squashfs_inode_t \
+				inode);
+	long long		(*read_blocklist)(struct inode *inode, int \
+				index, int readahead_blks, char *block_list, \
+				unsigned short **block_p, unsigned int *bsize);
+	int			(*read_fragment_index_table)(struct super_block *s);
+};
+#endif
diff -Npur linux-2.6-block/include/linux/suspend.h linux-2.6-block-custom/include/linux/suspend.h
--- linux-2.6-block/include/linux/suspend.h	2008-09-27 16:12:28.422899070 +0900
+++ linux-2.6-block-custom/include/linux/suspend.h	2008-09-26 19:45:20.390755670 +0900
@@ -280,4 +280,69 @@ static inline void register_nosave_regio
 
 extern struct mutex pm_mutex;
 
+enum {
+	TOI_CAN_HIBERNATE,
+	TOI_CAN_RESUME,
+	TOI_RESUME_DEVICE_OK,
+	TOI_NORESUME_SPECIFIED,
+	TOI_SANITY_CHECK_PROMPT,
+	TOI_CONTINUE_REQ,
+	TOI_RESUMED_BEFORE,
+	TOI_BOOT_TIME,
+	TOI_NOW_RESUMING,
+	TOI_IGNORE_LOGLEVEL,
+	TOI_TRYING_TO_RESUME,
+	TOI_LOADING_ALT_IMAGE,
+	TOI_STOP_RESUME,
+	TOI_IO_STOPPED,
+	TOI_NOTIFIERS_PREPARE,
+	TOI_CLUSTER_MODE,
+};
+
+#ifdef CONFIG_TOI
+
+/* Used in init dir files */
+extern unsigned long toi_state;
+#define set_toi_state(bit) (set_bit(bit, &toi_state))
+#define clear_toi_state(bit) (clear_bit(bit, &toi_state))
+#define test_toi_state(bit) (test_bit(bit, &toi_state))
+extern int toi_running;
+
+#else /* !CONFIG_TOI */
+
+#define toi_state		(0)
+#define set_toi_state(bit) do { } while (0)
+#define clear_toi_state(bit) do { } while (0)
+#define test_toi_state(bit) (0)
+#define toi_running (0)
+#endif /* CONFIG_TOI */
+
+#ifdef CONFIG_HIBERNATION
+#ifdef CONFIG_TOI
+extern void toi_try_resume(void);
+#else
+#define toi_try_resume() do { } while (0)
+#endif
+
+extern int resume_attempted;
+extern int software_resume(void);
+
+static inline void check_resume_attempted(void)
+{
+	if (resume_attempted)
+		return;
+
+	software_resume();
+}
+#else
+#define check_resume_attempted() do { } while (0)
+#define resume_attempted (0)
+#endif
+
+#ifdef CONFIG_PRINTK_NOSAVE
+#define POSS_NOSAVE __nosavedata
+#else
+#define POSS_NOSAVE
+#endif
+
 #endif /* _LINUX_SUSPEND_H */
diff -Npur linux-2.6-block/include/linux/swap.h linux-2.6-block-custom/include/linux/swap.h
--- linux-2.6-block/include/linux/swap.h	2008-09-27 16:12:28.422899070 +0900
+++ linux-2.6-block-custom/include/linux/swap.h	2008-09-26 19:45:20.390755670 +0900
@@ -164,6 +164,7 @@ extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
 extern long nr_swap_pages;
 extern unsigned int nr_free_buffer_pages(void);
+extern unsigned int nr_unallocated_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
 
 /* Definition of global_page_state not available yet */
@@ -187,6 +188,8 @@ extern unsigned long try_to_free_mem_cgr
 							gfp_t gfp_mask);
 extern int __isolate_lru_page(struct page *page, int mode);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
+extern void shrink_one_zone(struct zone *zone, unsigned long desired_size,
+		int ps_wanted);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern long vm_total_pages;
@@ -353,5 +356,10 @@ static inline swp_entry_t get_swap_page(
 #define disable_swap_token() do { } while(0)
 
 #endif /* CONFIG_SWAP */
+
+/* For TuxOnIce - unlink LRU pages while saving separately */
+void unlink_lru_lists(void);
+void relink_lru_lists(void);
+
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff -Npur linux-2.6-block/include/linux/thinkpad_ec.h linux-2.6-block-custom/include/linux/thinkpad_ec.h
--- linux-2.6-block/include/linux/thinkpad_ec.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/include/linux/thinkpad_ec.h	2008-09-26 19:45:38.667755679 +0900
@@ -0,0 +1,47 @@
+/*
+ *  thinkpad_ec.h - interface to ThinkPad embedded controller LPC3 functions
+ *
+ *  Copyright (C) 2005 Shem Multinymous <multinymous@gmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _THINKPAD_EC_H
+#define _THINKPAD_EC_H
+
+#ifdef __KERNEL__
+
+#define TP_CONTROLLER_ROW_LEN 16
+
+/* EC transactions input and output (possibly partial) vectors of 16 bytes. */
+struct thinkpad_ec_row {
+	u16 mask; /* bitmap of which entries of val[] are meaningful */
+	u8 val[TP_CONTROLLER_ROW_LEN];
+};
+
+extern int __must_check thinkpad_ec_lock(void);
+extern int __must_check thinkpad_ec_try_lock(void);
+extern void thinkpad_ec_unlock(void);
+
+extern int thinkpad_ec_read_row(const struct thinkpad_ec_row *args,
+				struct thinkpad_ec_row *data);
+extern int thinkpad_ec_try_read_row(const struct thinkpad_ec_row *args,
+				    struct thinkpad_ec_row *mask);
+extern int thinkpad_ec_prefetch_row(const struct thinkpad_ec_row *args);
+extern void thinkpad_ec_invalidate(void);
+
+
+#endif /* __KERNEL */
+#endif /* _THINKPAD_EC_H */
diff -Npur linux-2.6-block/init/do_mounts.c linux-2.6-block-custom/init/do_mounts.c
--- linux-2.6-block/init/do_mounts.c	2008-09-27 16:12:58.954943506 +0900
+++ linux-2.6-block-custom/init/do_mounts.c	2008-09-26 19:45:20.390755670 +0900
@@ -404,6 +404,8 @@ void __init prepare_namespace(void)
 	if (is_floppy && rd_doload && rd_load_disk(0))
 		ROOT_DEV = Root_RAM0;
 
+	check_resume_attempted();
+
 	mount_root();
 out:
 	sys_mount(".", "/", NULL, MS_MOVE, NULL);
diff -Npur linux-2.6-block/init/do_mounts_initrd.c linux-2.6-block-custom/init/do_mounts_initrd.c
--- linux-2.6-block/init/do_mounts_initrd.c	2008-09-27 16:12:28.922922396 +0900
+++ linux-2.6-block-custom/init/do_mounts_initrd.c	2008-09-26 19:45:20.390755670 +0900
@@ -6,6 +6,7 @@
 #include <linux/romfs_fs.h>
 #include <linux/initrd.h>
 #include <linux/sched.h>
+#include <linux/suspend.h>
 #include <linux/freezer.h>
 
 #include "do_mounts.h"
@@ -68,6 +69,11 @@ static void __init handle_initrd(void)
 
 	current->flags &= ~PF_FREEZER_SKIP;
 
+	if (!resume_attempted)
+		printk(KERN_ERR "TuxOnIce: No attempt was made to resume from "
+				"any image that might exist.\n");
+	clear_toi_state(TOI_BOOT_TIME);
+
 	/* move initrd to rootfs' /old */
 	sys_fchdir(old_fd);
 	sys_mount("/", ".", NULL, MS_MOVE, NULL);
diff -Npur linux-2.6-block/init/do_mounts_rd.c linux-2.6-block-custom/init/do_mounts_rd.c
--- linux-2.6-block/init/do_mounts_rd.c	2008-09-27 16:12:28.922922396 +0900
+++ linux-2.6-block-custom/init/do_mounts_rd.c	2008-09-26 20:30:06.854752140 +0900
@@ -5,6 +5,7 @@
 #include <linux/ext2_fs.h>
 #include <linux/romfs_fs.h>
 #include <linux/cramfs_fs.h>
+#include <linux/squashfs_fs.h>
 #include <linux/initrd.h>
 #include <linux/string.h>
 
@@ -37,6 +38,7 @@ static int __init crd_load(int in_fd, in
  * numbers could not be found.
  *
  * We currently check for the following magic numbers:
+ *      squashfs
  * 	minix
  * 	ext2
  *	romfs
@@ -51,6 +53,7 @@ identify_ramdisk_image(int fd, int start
 	struct ext2_super_block *ext2sb;
 	struct romfs_super_block *romfsb;
 	struct cramfs_super *cramfsb;
+	struct squashfs_super_block *squashfsb;
 	int nblocks = -1;
 	unsigned char *buf;
 
@@ -62,6 +65,7 @@ identify_ramdisk_image(int fd, int start
 	ext2sb = (struct ext2_super_block *) buf;
 	romfsb = (struct romfs_super_block *) buf;
 	cramfsb = (struct cramfs_super *) buf;
+	squashfsb = (struct squashfs_super_block *) buf;
 	memset(buf, 0xe5, size);
 
 	/*
@@ -99,6 +103,18 @@ identify_ramdisk_image(int fd, int start
 		goto done;
 	}
 
+	/* squashfs is at block zero too */
+	if (squashfsb->s_magic == SQUASHFS_MAGIC) {
+		printk(KERN_NOTICE
+		       "RAMDISK: squashfs filesystem found at block %d\n",
+		       start_block);
+		if (squashfsb->s_major < 3)
+			nblocks = (squashfsb->bytes_used_2+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
+		else
+			nblocks = (squashfsb->bytes_used+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
+		goto done;
+	}
+
 	/*
 	 * Read block 1 to test for minix and ext2 superblock
 	 */
diff -Npur linux-2.6-block/init/main.c linux-2.6-block-custom/init/main.c
--- linux-2.6-block/init/main.c	2008-09-27 16:12:28.926910333 +0900
+++ linux-2.6-block-custom/init/main.c	2008-09-26 19:45:20.390755670 +0900
@@ -57,6 +57,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
+#include <linux/dyn_pageflags.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
@@ -607,6 +608,7 @@ asmlinkage void __init start_kernel(void
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	dyn_pageflags_init();
 	sched_clock_init();
 	profile_init();
 	if (!irqs_disabled())
@@ -648,6 +650,7 @@ asmlinkage void __init start_kernel(void
 	enable_debug_pagealloc();
 	cpu_hotplug_init();
 	kmem_cache_init();
+	dyn_pageflags_use_kzalloc();
 	debug_objects_mem_init();
 	idr_init_cache();
 	setup_per_cpu_pageset();
diff -Npur linux-2.6-block/kernel/power/Kconfig linux-2.6-block-custom/kernel/power/Kconfig
--- linux-2.6-block/kernel/power/Kconfig	2008-09-27 16:12:28.990905827 +0900
+++ linux-2.6-block-custom/kernel/power/Kconfig	2008-09-26 19:45:20.394755549 +0900
@@ -34,6 +34,18 @@ config PM_VERBOSE
 	---help---
 	This option enables verbose messages from the Power Management code.
 
+config PRINTK_NOSAVE
+	depends on PM && PM_DEBUG
+	bool "Preserve printk data from boot kernel when resuming."
+	default n
+	---help---
+	This option gives printk data and the associated variables the
+	attribute __nosave, which means that they will not be saved as
+	part of the image. The net effect is that after resuming, your
+	dmesg will show the messages from prior to the atomic restore,
+	instead of the messages from the resumed kernel. This may be
+	useful for debugging hibernation.
+
 config CAN_PM_TRACE
 	def_bool y
 	depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
@@ -179,6 +191,272 @@ config PM_STD_PARTITION
 	  suspended image to. It will simply pick the first available swap 
 	  device.
 
+menuconfig TOI_CORE
+	tristate "Enhanced Hibernation (TuxOnIce)"
+	depends on HIBERNATION
+	default y
+	---help---
+	  TuxOnIce is the 'new and improved' suspend support.
+	  
+	  See the TuxOnIce home page (tuxonice.net)
+	  for FAQs, HOWTOs and other documentation.
+
+	comment "Image Storage (you need at least one allocator)"
+		depends on TOI_CORE
+	
+	config TOI_FILE
+		tristate "File Allocator"
+		depends on TOI_CORE
+		default y
+		---help---
+		  This option enables support for storing an image in a
+		  simple file. This should be possible, but we're still
+		  testing it.
+
+	config TOI_SWAP
+		tristate "Swap Allocator"
+		depends on TOI_CORE && SWAP
+		default y
+		---help---
+		  This option enables support for storing an image in your
+		  swap space.
+
+	comment "General Options"
+		depends on TOI_CORE
+
+	config TOI_DEFAULT_PRE_HIBERNATE
+		string "Default pre-hibernate command"
+		depends on TOI_CORE
+		---help---
+		  This entry allows you to specify a command to be run prior
+		  to starting a hibernation cycle. If this command returns
+		  a non-zero result code, hibernating will be aborted. If
+		  you're starting hibernation via the hibernate script,
+		  this value should probably be blank.
+
+	config TOI_DEFAULT_POST_HIBERNATE
+		string "Default post-resume command"
+		depends on TOI_CORE
+		---help---
+		  This entry allows you to specify a command to be run after
+		  completing a hibernation cycle. The return code of this
+		  command is ignored. If you're starting hibernation via the
+		  hibernate script, this value should probably be blank.
+
+	config TOI_CRYPTO
+		tristate "Compression support"
+		depends on TOI_CORE && CRYPTO
+		default y
+		---help---
+		  This option adds support for using cryptoapi compression
+		  algorithms. Compression is particularly useful as
+		  the LZF support that comes with the TuxOnIce patch can double
+		  your suspend and resume speed.
+
+		  You probably want this, so say Y here.
+
+	comment "No compression support available without Cryptoapi support."
+		depends on TOI_CORE && !CRYPTO
+
+	config TOI_USERUI
+		tristate "Userspace User Interface support"
+		depends on TOI_CORE && NET && (VT || SERIAL_CONSOLE)
+		default y
+		---help---
+		  This option enabled support for a userspace based user interface
+		  to TuxOnIce, which allows you to have a nice display while suspending
+		  and resuming, and also enables features such as pressing escape to
+		  cancel a cycle or interactive debugging.
+
+	config TOI_USERUI_DEFAULT_PATH
+		string "Default userui program location"
+		default "/usr/local/sbin/tuxonice_fbsplash"
+		depends on TOI_USERUI
+		---help---
+		  This entry allows you to specify a default path to the userui binary.
+
+	config TOI_KEEP_IMAGE
+		bool "Allow Keep Image Mode"
+		depends on TOI_CORE
+		---help---
+		  This option allows you to keep and image and reuse it. It is intended
+		  __ONLY__ for use with systems where all filesystems are mounted read-
+		  only (kiosks, for example). To use it, compile this option in and boot
+		  normally. Set the KEEP_IMAGE flag in /sys/power/tuxonice and suspend.
+		  When you resume, the image will not be removed. You will be unable to turn
+		  off swap partitions (assuming you are using the swap allocator), but future
+		  suspends simply do a power-down. The image can be updated using the
+		  kernel command line parameter suspend_act= to turn off the keep image
+		  bit. Keep image mode is a little less user friendly on purpose - it
+		  should not be used without thought!
+
+	config TOI_REPLACE_SWSUSP
+		bool "Replace swsusp by default"
+		default y
+		depends on TOI_CORE
+		---help---
+		  TuxOnIce can replace swsusp. This option makes that the default state,
+		  requiring you to echo 0 > /sys/power/tuxonice/replace_swsusp if you want
+		  to use the vanilla kernel functionality. Note that your initrd/ramfs will
+		  need to do this before trying to resume, too.
+		  With overriding swsusp enabled, echoing disk  to /sys/power/state will
+		  start a TuxOnIce cycle. If resume= doesn't specify an allocator and both
+		  the swap and file allocators are compiled in, the swap allocator will be
+		  used by default.
+
+	menuconfig TOI_CLUSTER
+		tristate "Cluster support"
+		default n
+		depends on TOI_CORE && NET && BROKEN
+		---help---
+		  Support for linking multiple machines in a cluster so that they suspend
+		  and resume together.
+
+	config TOI_DEFAULT_CLUSTER_INTERFACE
+		string "Default cluster interface"
+		depends on TOI_CLUSTER
+		---help---
+		  The default interface on which to communicate with other nodes in
+		  the cluster.
+		  
+		  If no value is set here, cluster support will be disabled by default.
+
+	config TOI_DEFAULT_CLUSTER_KEY
+		string "Default cluster key"
+		default "Default"
+		depends on TOI_CLUSTER
+		---help---
+		  The default key used by this node. All nodes in the same cluster
+		  have the same key. Multiple clusters may coexist on the same lan
+		  by using different values for this key.
+
+	config TOI_CLUSTER_IMAGE_TIMEOUT
+		int "Timeout when checking for image"
+		default 15
+		depends on TOI_CLUSTER
+		---help---
+		  Timeout (seconds) before continuing to boot when waiting to see
+		  whether other nodes might have an image. Set to -1 to wait
+		  indefinitely. In WAIT_UNTIL_NODES is non zero, we might continue
+		  booting sooner than this timeout.
+
+	config TOI_CLUSTER_WAIT_UNTIL_NODES
+		int "Nodes without image before continuing"
+		default 0
+		depends on TOI_CLUSTER
+		---help---
+		  When booting and no image is found, we wait to see if other nodes
+		  have an image before continuing to boot. This value lets us
+		  continue after seeing a certain number of nodes without an image,
+		  instead of continuing to wait for the timeout. Set to 0 to only
+		  use the timeout.
+
+	config TOI_DEFAULT_CLUSTER_PRE_HIBERNATE
+		string "Default pre-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called when starting to hibernate.
+
+	config TOI_DEFAULT_CLUSTER_POST_HIBERNATE
+		string "Default post-hibernate script"
+		depends on TOI_CLUSTER
+		---help---
+		  The default script to be called after resuming from hibernation.
+
+	config TOI_CHECKSUM
+		bool "Checksum pageset2"
+		default y
+		depends on TOI_CORE
+		select CRYPTO
+		select CRYPTO_ALGAPI
+		select CRYPTO_MD4
+		---help---
+		  Adds support for checksumming pageset2 pages, to ensure you really get an
+		  atomic copy. Since some filesystems (XFS especially) change metadata even
+		  when there's no other activity, we need this to check for pages that have
+		  been changed while we were saving the page cache. If your debugging output
+		  always says no pages were resaved, you may be able to safely disable this
+		  option.
+
+	config TOI_DEFAULT_WAIT
+		int "Default waiting time for emergency boot messages"
+		default "25"
+		range -1 32768
+		depends on TOI_CORE
+		help
+		  TuxOnIce can display warnings very early in the process of resuming,
+		  if (for example) it appears that you have booted a kernel that doesn't
+		  match an image on disk. It can then give you the opportunity to either
+		  continue booting that kernel, or reboot the machine. This option can be
+		  used to control how long to wait in such circumstances. -1 means wait
+		  forever. 0 means don't wait at all (do the default action, which will
+		  generally be to continue booting and remove the image). Values of 1 or
+		  more indicate a number of seconds (up to 255) to wait before doing the
+		  default.
+
+	config  TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE
+		int "Default extra pages allowance"
+		default "500"
+		range 500 32768
+		depends on TOI_CORE
+		help
+		  This value controls the default for the allowance TuxOnIce makes for
+		  drivers to allocate extra memory during the atomic copy. The default
+		  value of 500 will be okay if you're not using DRI. If you are using
+		  DRI, the easiest way to find what value to use is to try to hibernate
+		  and look at how many pages were actually needed in the sysfs entry
+		  /sys/power/tuxonice/debug_info (first number on the last line), adding
+		  a little extra because the value is not always the same.
+
+	config	TOI_PAGEFLAGS_TEST
+		tristate "Test pageflags"
+		default N
+		depends on TOI_CORE
+		help
+		  Test pageflags.
+
+config TOI_PAGEFLAGS_EXPORTS
+	bool
+	depends on TOI_PAGEFLAGS_TEST=m
+	default y
+
+config TOI_USERUI_EXPORTS
+	bool
+	depends on TOI_USERUI=m
+	default y
+
+config TOI_SWAP_EXPORTS
+	bool
+	depends on TOI_SWAP=m
+	default y
+
+config TOI_FILE_EXPORTS
+	bool
+	depends on TOI_FILE=m
+	default y
+
+config TOI_CRYPTO_EXPORTS
+	bool
+	depends on TOI_CRYPTO=m
+	default y
+
+config TOI_CORE_EXPORTS
+	bool
+	depends on TOI_CORE=m
+	default y
+
+config TOI_EXPORTS
+	bool
+	depends on TOI_SWAP_EXPORTS || TOI_FILE_EXPORTS || \
+		TOI_CRYPTO_EXPORTS || TOI_CLUSTER=m || \
+		TOI_USERUI_EXPORTS || TOI_PAGEFLAGS_EXPORTS
+	default y
+
+config TOI
+	bool
+	depends on TOI_CORE!=n
+	default y
+
 config APM_EMULATION
 	tristate "Advanced Power Management Emulation"
 	depends on PM && SYS_SUPPORTS_APM_EMULATION
diff -Npur linux-2.6-block/kernel/power/Makefile linux-2.6-block-custom/kernel/power/Makefile
--- linux-2.6-block/kernel/power/Makefile	2008-09-27 16:12:28.990905827 +0900
+++ linux-2.6-block-custom/kernel/power/Makefile	2008-09-26 19:45:20.394755549 +0900
@@ -4,6 +4,37 @@ EXTRA_CFLAGS	+=	-DDEBUG
 endif
 
 obj-y				:= main.o
+
+tuxonice_core-objs := tuxonice_modules.o tuxonice_sysfs.o tuxonice_highlevel.o \
+		tuxonice_io.o tuxonice_pagedir.o tuxonice_prepare_image.o \
+		tuxonice_extent.o tuxonice_pageflags.o tuxonice_ui.o \
+		tuxonice_power_off.o tuxonice_atomic_copy.o
+
+obj-$(CONFIG_TOI)		+= tuxonice_builtin.o
+
+ifdef CONFIG_PM_DEBUG
+tuxonice_core-objs		+= tuxonice_alloc.o
+endif
+
+ifdef CONFIG_TOI_CHECKSUM
+tuxonice_core-objs		+= tuxonice_checksum.o
+endif
+
+ifdef CONFIG_NET
+tuxonice_core-objs		+= tuxonice_storage.o tuxonice_netlink.o
+endif
+
+obj-$(CONFIG_TOI_CORE)		+= tuxonice_core.o
+obj-$(CONFIG_TOI_CRYPTO)	+= tuxonice_compress.o
+
+obj-$(CONFIG_TOI_SWAP)		+= tuxonice_block_io.o tuxonice_swap.o
+obj-$(CONFIG_TOI_FILE)		+= tuxonice_block_io.o tuxonice_file.o
+obj-$(CONFIG_TOI_CLUSTER)	+= tuxonice_cluster.o
+
+obj-$(CONFIG_TOI_USERUI)	+= tuxonice_userui.o
+
+obj-$(CONFIG_TOI_PAGEFLAGS_TEST)	+= toi_pageflags_test.o
+
 obj-$(CONFIG_PM_SLEEP)		+= process.o console.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o disk.o snapshot.o swap.o user.o
 
diff -Npur linux-2.6-block/kernel/power/disk.c linux-2.6-block-custom/kernel/power/disk.c
--- linux-2.6-block/kernel/power/disk.c	2008-09-27 16:12:28.990905827 +0900
+++ linux-2.6-block-custom/kernel/power/disk.c	2008-09-26 19:45:20.394755549 +0900
@@ -25,9 +25,11 @@
 
 #include "power.h"
 
+#include "tuxonice.h"
+#include "tuxonice_builtin.h"
 
 static int noresume = 0;
-static char resume_file[256] = CONFIG_PM_STD_PARTITION;
+char resume_file[256] = CONFIG_PM_STD_PARTITION;
 dev_t swsusp_resume_device;
 sector_t swsusp_resume_block;
 
@@ -105,7 +107,7 @@ static int hibernation_test(int level) {
  *	hibernation
  */
 
-static int platform_begin(int platform_mode)
+int platform_begin(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->begin() : 0;
@@ -116,7 +118,7 @@ static int platform_begin(int platform_m
  *	working state
  */
 
-static void platform_end(int platform_mode)
+void platform_end(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->end();
@@ -127,7 +129,7 @@ static void platform_end(int platform_mo
  *	platform driver if so configured and return an error code if it fails
  */
 
-static int platform_pre_snapshot(int platform_mode)
+int platform_pre_snapshot(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_snapshot() : 0;
@@ -138,7 +140,7 @@ static int platform_pre_snapshot(int pla
  *	of operation using the platform driver (called with interrupts disabled)
  */
 
-static void platform_leave(int platform_mode)
+void platform_leave(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->leave();
@@ -149,7 +151,7 @@ static void platform_leave(int platform_
  *	using the platform driver (must be called after platform_prepare())
  */
 
-static void platform_finish(int platform_mode)
+void platform_finish(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->finish();
@@ -161,7 +163,7 @@ static void platform_finish(int platform
  *	called, platform_restore_cleanup() must be called.
  */
 
-static int platform_pre_restore(int platform_mode)
+int platform_pre_restore(int platform_mode)
 {
 	return (platform_mode && hibernation_ops) ?
 		hibernation_ops->pre_restore() : 0;
@@ -174,7 +176,7 @@ static int platform_pre_restore(int plat
  *	regardless of the result of platform_pre_restore().
  */
 
-static void platform_restore_cleanup(int platform_mode)
+void platform_restore_cleanup(int platform_mode)
 {
 	if (platform_mode && hibernation_ops)
 		hibernation_ops->restore_cleanup();
@@ -508,6 +510,11 @@ int hibernate(void)
 {
 	int error;
 
+#ifdef CONFIG_TOI
+	if (test_action_state(TOI_REPLACE_SWSUSP))
+		return toi_try_hibernate(1);
+#endif
+
 	mutex_lock(&pm_mutex);
 	/* The snapshot device should not be opened while we're running */
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -580,10 +587,21 @@ int hibernate(void)
  *
  */
 
-static int software_resume(void)
+int software_resume(void)
 {
 	int error;
 	unsigned int flags;
+	resume_attempted = 1;
+
+#ifdef CONFIG_TOI
+	/*
+	 * We can't know (until an image header - if any - is loaded), whether
+	 * we did override swsusp. We therefore ensure that both are tried.
+	 */
+	if (test_action_state(TOI_REPLACE_SWSUSP))
+		printk(KERN_INFO "Replacing swsusp.\n");
+		toi_try_resume();
+#endif
 
 	/*
 	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
@@ -596,6 +614,7 @@ static int software_resume(void)
 	 * here to avoid lockdep complaining.
 	 */
 	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+
 	if (!swsusp_resume_device) {
 		if (!strlen(resume_file)) {
 			mutex_unlock(&pm_mutex);
@@ -667,9 +686,6 @@ static int software_resume(void)
 	return error;
 }
 
-late_initcall(software_resume);
-
-
 static const char * const hibernation_modes[] = {
 	[HIBERNATION_PLATFORM]	= "platform",
 	[HIBERNATION_SHUTDOWN]	= "shutdown",
@@ -882,6 +898,7 @@ static int __init resume_offset_setup(ch
 static int __init noresume_setup(char *str)
 {
 	noresume = 1;
+	set_toi_state(TOI_NORESUME_SPECIFIED);
 	return 1;
 }
 
diff -Npur linux-2.6-block/kernel/power/power.h linux-2.6-block-custom/kernel/power/power.h
--- linux-2.6-block/kernel/power/power.h	2008-09-27 16:12:28.994926590 +0900
+++ linux-2.6-block-custom/kernel/power/power.h	2008-09-26 19:45:20.394755549 +0900
@@ -1,7 +1,16 @@
+/*
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ */
+
+#ifndef KERNEL_POWER_POWER_H
+#define KERNEL_POWER_POWER_H
+
 #include <linux/suspend.h>
 #include <linux/suspend_ioctls.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
+#include "tuxonice.h"
+#include "tuxonice_builtin.h"
 
 struct swsusp_info {
 	struct new_utsname	uts;
@@ -21,18 +30,22 @@ struct swsusp_info {
 extern int arch_hibernation_header_save(void *addr, unsigned int max_size);
 extern int arch_hibernation_header_restore(void *addr);
 
-static inline int init_header_complete(struct swsusp_info *info)
+static inline int init_swsusp_header_complete(struct swsusp_info *info)
 {
 	return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE);
 }
 
-static inline char *check_image_kernel(struct swsusp_info *info)
+static inline char *check_swsusp_image_kernel(struct swsusp_info *info)
 {
 	return arch_hibernation_header_restore(info) ?
 			"architecture specific data" : NULL;
 }
+#else
+extern char *check_swsusp_image_kernel(struct swsusp_info *info);
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
+extern int init_swsusp_header(struct swsusp_info *info);
 
+extern char resume_file[256];
 /*
  * Keep some memory free so that I/O operations can succeed without paging
  * [Might this be more than 4 MB?]
@@ -63,6 +76,8 @@ static struct kobj_attribute _name##_att
 	.store	= _name##_store,		\
 }
 
+extern struct pbe *restore_pblist;
+
 /* Preferred image size in bytes (default 500 MB) */
 extern unsigned long image_size;
 extern int in_suspend;
@@ -223,3 +238,26 @@ static inline void suspend_thaw_processe
 {
 }
 #endif
+
+extern struct page *saveable_page(unsigned long pfn);
+#ifdef CONFIG_HIGHMEM
+extern struct page *saveable_highmem_page(unsigned long pfn);
+#else
+static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
+#endif
+
+#define PBES_PER_PAGE (PAGE_SIZE / sizeof(struct pbe))
+extern struct list_head nosave_regions;
+
+/**
+ *	This structure represents a range of page frames the contents of which
+ *	should not be saved during the suspend.
+ */
+
+struct nosave_region {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
+};
+
+#endif
diff -Npur linux-2.6-block/kernel/power/process.c linux-2.6-block-custom/kernel/power/process.c
--- linux-2.6-block/kernel/power/process.c	2008-09-27 16:12:28.994926590 +0900
+++ linux-2.6-block-custom/kernel/power/process.c	2008-09-26 19:45:20.394755549 +0900
@@ -13,6 +13,10 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
+#include <linux/buffer_head.h>
+
+int freezer_state;
+EXPORT_SYMBOL(freezer_state);
 
 /* 
  * Timeout for stopping processes
@@ -201,7 +205,8 @@ static int try_to_freeze_tasks(bool sig_
 		do_each_thread(g, p) {
 			task_lock(p);
 			if (freezing(p) && !freezer_should_skip(p))
-				printk(KERN_ERR " %s\n", p->comm);
+				printk(KERN_ERR " %s (%d) failed to freeze.\n",
+						p->comm, p->pid);
 			cancel_freezing(p);
 			task_unlock(p);
 		} while_each_thread(g, p);
@@ -221,17 +226,25 @@ int freeze_processes(void)
 {
 	int error;
 
-	printk("Freezing user space processes ... ");
+ 	printk(KERN_INFO "Stopping fuse filesystems.\n");
+ 	freeze_filesystems(FS_FREEZER_FUSE);
+ 	freezer_state = FREEZER_FILESYSTEMS_FROZEN;
+ 	printk(KERN_INFO "Freezing user space processes ... ");
 	error = try_to_freeze_tasks(true);
 	if (error)
 		goto Exit;
-	printk("done.\n");
+ 	printk(KERN_INFO "done.\n");
 
-	printk("Freezing remaining freezable tasks ... ");
+ 	sys_sync();
+ 	printk(KERN_INFO "Stopping normal filesystems.\n");
+ 	freeze_filesystems(FS_FREEZER_NORMAL);
+ 	freezer_state = FREEZER_USERSPACE_FROZEN;
+ 	printk(KERN_INFO "Freezing remaining freezable tasks ... ");
 	error = try_to_freeze_tasks(false);
 	if (error)
 		goto Exit;
 	printk("done.");
+ 	freezer_state = FREEZER_FULLY_ON;
  Exit:
 	BUG_ON(in_atomic());
 	printk("\n");
@@ -257,11 +270,35 @@ static void thaw_tasks(bool nosig_only)
 
 void thaw_processes(void)
 {
-	printk("Restarting tasks ... ");
-	thaw_tasks(true);
+	int old_state = freezer_state;
+
+	if (old_state == FREEZER_OFF)
+		return;
+
+	/*
+	 * Change state beforehand because thawed tasks might submit I/O
+	 * immediately.
+	 */
+	freezer_state = FREEZER_OFF;
+
+	printk(KERN_INFO "Restarting all filesystems ...\n");
+	thaw_filesystems(FS_FREEZER_ALL);
+
+	printk(KERN_INFO "Restarting tasks ... ");
+
+	if (old_state == FREEZER_FULLY_ON)
+		thaw_tasks(true);
 	thaw_tasks(false);
 	schedule();
 	printk("done.\n");
 }
 
 EXPORT_SYMBOL(refrigerator);
+
+void thaw_kernel_threads(void)
+{
+ 	freezer_state = FREEZER_USERSPACE_FROZEN;
+ 	printk(KERN_INFO "Restarting normal filesystems.\n");
+ 	thaw_filesystems(FS_FREEZER_NORMAL);
+ 	thaw_tasks(true);
+}
diff -Npur linux-2.6-block/kernel/power/snapshot.c linux-2.6-block-custom/kernel/power/snapshot.c
--- linux-2.6-block/kernel/power/snapshot.c	2008-09-27 16:12:28.994926590 +0900
+++ linux-2.6-block-custom/kernel/power/snapshot.c	2008-09-26 19:45:20.398754871 +0900
@@ -33,6 +33,7 @@
 #include <asm/io.h>
 
 #include "power.h"
+#include "tuxonice_builtin.h"
 
 static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
@@ -44,6 +45,12 @@ static void swsusp_unset_page_forbidden(
  * directly to their "original" page frames.
  */
 struct pbe *restore_pblist;
+int resume_attempted;
+EXPORT_SYMBOL_GPL(resume_attempted);
+
+#ifdef CONFIG_TOI
+#include "tuxonice_pagedir.h"
+#endif
 
 /* Pointer to an auxiliary buffer (1 page) */
 static void *buffer;
@@ -86,6 +93,11 @@ static void *get_image_page(gfp_t gfp_ma
 
 unsigned long get_safe_page(gfp_t gfp_mask)
 {
+#ifdef CONFIG_TOI
+	if (toi_running)
+		return toi_get_nonconflicting_page();
+#endif
+
 	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
 }
 
@@ -561,18 +573,8 @@ static unsigned long memory_bm_next_pfn(
 	return bb->start_pfn + bit;
 }
 
-/**
- *	This structure represents a range of page frames the contents of which
- *	should not be saved during the suspend.
- */
-
-struct nosave_region {
-	struct list_head list;
-	unsigned long start_pfn;
-	unsigned long end_pfn;
-};
-
-static LIST_HEAD(nosave_regions);
+LIST_HEAD(nosave_regions);
+EXPORT_SYMBOL_GPL(nosave_regions);
 
 /**
  *	register_nosave_region - register a range of page frames the contents
@@ -809,7 +811,7 @@ static unsigned int count_free_highmem_p
  *	and it isn't a part of a free chunk of pages.
  */
 
-static struct page *saveable_highmem_page(unsigned long pfn)
+struct page *saveable_highmem_page(unsigned long pfn)
 {
 	struct page *page;
 
@@ -851,8 +853,6 @@ unsigned int count_highmem_pages(void)
 	}
 	return n;
 }
-#else
-static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
@@ -864,7 +864,7 @@ static inline void *saveable_highmem_pag
  *	a free chunk of pages.
  */
 
-static struct page *saveable_page(unsigned long pfn)
+struct page *saveable_page(unsigned long pfn)
 {
 	struct page *page;
 
@@ -1198,6 +1198,11 @@ asmlinkage int swsusp_save(void)
 {
 	unsigned int nr_pages, nr_highmem;
 
+#ifdef CONFIG_TOI
+	if (toi_running)
+		return toi_post_context_save();
+#endif
+
 	printk(KERN_INFO "PM: Creating hibernation image: \n");
 
 	drain_local_pages(NULL);
@@ -1238,14 +1243,14 @@ asmlinkage int swsusp_save(void)
 }
 
 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
-static int init_header_complete(struct swsusp_info *info)
+int init_swsusp_header_complete(struct swsusp_info *info)
 {
 	memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
 	info->version_code = LINUX_VERSION_CODE;
 	return 0;
 }
 
-static char *check_image_kernel(struct swsusp_info *info)
+char *check_swsusp_image_kernel(struct swsusp_info *info)
 {
 	if (info->version_code != LINUX_VERSION_CODE)
 		return "kernel version";
@@ -1259,6 +1264,7 @@ static char *check_image_kernel(struct s
 		return "machine";
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(check_swsusp_image_kernel);
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
 
 unsigned long snapshot_get_image_size(void)
@@ -1266,7 +1272,7 @@ unsigned long snapshot_get_image_size(vo
 	return nr_copy_pages + nr_meta_pages + 1;
 }
 
-static int init_header(struct swsusp_info *info)
+int init_swsusp_header(struct swsusp_info *info)
 {
 	memset(info, 0, sizeof(struct swsusp_info));
 	info->num_physpages = num_physpages;
@@ -1274,7 +1280,7 @@ static int init_header(struct swsusp_inf
 	info->pages = snapshot_get_image_size();
 	info->size = info->pages;
 	info->size <<= PAGE_SHIFT;
-	return init_header_complete(info);
+	return init_swsusp_header_complete(info);
 }
 
 /**
@@ -1330,7 +1336,7 @@ int snapshot_read_next(struct snapshot_h
 	if (!handle->offset) {
 		int error;
 
-		error = init_header((struct swsusp_info *)buffer);
+		error = init_swsusp_header((struct swsusp_info *)buffer);
 		if (error)
 			return error;
 		handle->buffer = buffer;
@@ -1427,7 +1433,7 @@ static int check_header(struct swsusp_in
 {
 	char *reason;
 
-	reason = check_image_kernel(info);
+	reason = check_swsusp_image_kernel(info);
 	if (!reason && info->num_physpages != num_physpages)
 		reason = "memory size";
 	if (reason) {
diff -Npur linux-2.6-block/kernel/power/tuxonice.h linux-2.6-block-custom/kernel/power/tuxonice.h
--- linux-2.6-block/kernel/power/tuxonice.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice.h	2008-09-26 19:48:23.971773229 +0900
@@ -0,0 +1,210 @@
+/*
+ * kernel/power/tuxonice.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations used throughout swsusp.
+ *
+ */
+
+#ifndef KERNEL_POWER_TOI_H
+#define KERNEL_POWER_TOI_H
+
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/suspend.h>
+#include <linux/dyn_pageflags.h>
+#include <linux/fs.h>
+#include <linux/kmod.h>
+#include <asm/setup.h>
+#include "tuxonice_pageflags.h"
+
+#define TOI_CORE_VERSION "3.0-rc7"
+
+#define MY_BOOT_KERNEL_DATA_VERSION 1
+
+struct toi_boot_kernel_data {
+	int version;
+	int size;
+	unsigned long toi_action;
+	unsigned long toi_debug_state;
+	int toi_default_console_level;
+	int toi_io_time[2][2];
+	char toi_nosave_commandline[COMMAND_LINE_SIZE];
+};
+
+extern struct toi_boot_kernel_data toi_bkd;
+
+/* Location of book kernel data struct in kernel being resumed */
+extern unsigned long boot_kernel_data_buffer;
+
+/*		 == Action states == 		*/
+
+enum {
+	TOI_REBOOT,
+	TOI_PAUSE,
+	TOI_LOGALL,
+	TOI_CAN_CANCEL,
+	TOI_KEEP_IMAGE,
+	TOI_FREEZER_TEST,
+	TOI_SINGLESTEP,
+	TOI_PAUSE_NEAR_PAGESET_END,
+	TOI_TEST_FILTER_SPEED,
+	TOI_TEST_BIO,
+	TOI_NO_PAGESET2,
+	TOI_PM_PREPARE_CONSOLE,
+	TOI_IGNORE_ROOTFS,
+	TOI_REPLACE_SWSUSP,
+	TOI_PAGESET2_FULL,
+	TOI_ABORT_ON_RESAVE_NEEDED,
+	TOI_NO_MULTITHREADED_IO,
+	TOI_NO_DIRECT_LOAD,
+	TOI_LATE_CPU_HOTPLUG,
+	TOI_GET_MAX_MEM_ALLOCD,
+	TOI_NO_FLUSHER_THREAD,
+};
+
+#define clear_action_state(bit) (test_and_clear_bit(bit, &toi_bkd.toi_action))
+#define test_action_state(bit) (test_bit(bit, &toi_bkd.toi_action))
+
+/*		 == Result states == 		*/
+
+enum {
+	TOI_ABORTED,
+	TOI_ABORT_REQUESTED,
+	TOI_NOSTORAGE_AVAILABLE,
+	TOI_INSUFFICIENT_STORAGE,
+	TOI_FREEZING_FAILED,
+	TOI_KEPT_IMAGE,
+	TOI_WOULD_EAT_MEMORY,
+	TOI_UNABLE_TO_FREE_ENOUGH_MEMORY,
+	TOI_PM_SEM,
+	TOI_DEVICE_REFUSED,
+	TOI_EXTRA_PAGES_ALLOW_TOO_SMALL,
+	TOI_UNABLE_TO_PREPARE_IMAGE,
+	TOI_FAILED_MODULE_INIT,
+	TOI_FAILED_MODULE_CLEANUP,
+	TOI_FAILED_IO,
+	TOI_OUT_OF_MEMORY,
+	TOI_IMAGE_ERROR,
+	TOI_PLATFORM_PREP_FAILED,
+	TOI_CPU_HOTPLUG_FAILED,
+	TOI_ARCH_PREPARE_FAILED,
+	TOI_RESAVE_NEEDED,
+	TOI_CANT_SUSPEND,
+	TOI_NOTIFIERS_PREPARE_FAILED,
+	TOI_PRE_SNAPSHOT_FAILED,
+	TOI_PRE_RESTORE_FAILED,
+};
+
+extern unsigned long toi_result;
+
+#define set_result_state(bit) (test_and_set_bit(bit, &toi_result))
+#define set_abort_result(bit) (test_and_set_bit(TOI_ABORTED, &toi_result), \
+				test_and_set_bit(bit, &toi_result))
+#define clear_result_state(bit) (test_and_clear_bit(bit, &toi_result))
+#define test_result_state(bit) (test_bit(bit, &toi_result))
+
+/*	 == Debug sections and levels == 	*/
+
+/* debugging levels. */
+enum {
+	TOI_STATUS = 0,
+	TOI_ERROR = 2,
+	TOI_LOW,
+	TOI_MEDIUM,
+	TOI_HIGH,
+	TOI_VERBOSE,
+};
+
+enum {
+	TOI_ANY_SECTION,
+	TOI_EAT_MEMORY,
+	TOI_IO,
+	TOI_HEADER,
+	TOI_WRITER,
+	TOI_MEMORY,
+};
+
+#define set_debug_state(bit) (test_and_set_bit(bit, &toi_bkd.toi_debug_state))
+#define clear_debug_state(bit) \
+	(test_and_clear_bit(bit, &toi_bkd.toi_debug_state))
+#define test_debug_state(bit) (test_bit(bit, &toi_bkd.toi_debug_state))
+
+/*		== Steps in hibernating ==	*/
+
+enum {
+	STEP_HIBERNATE_PREPARE_IMAGE,
+	STEP_HIBERNATE_SAVE_IMAGE,
+	STEP_HIBERNATE_POWERDOWN,
+	STEP_RESUME_CAN_RESUME,
+	STEP_RESUME_LOAD_PS1,
+	STEP_RESUME_DO_RESTORE,
+	STEP_RESUME_READ_PS2,
+	STEP_RESUME_GO,
+	STEP_RESUME_ALT_IMAGE,
+	STEP_CLEANUP,
+	STEP_QUIET_CLEANUP
+};
+
+/*		== TuxOnIce states ==
+	(see also include/linux/suspend.h)	*/
+
+#define get_toi_state()  (toi_state)
+#define restore_toi_state(saved_state) \
+	do { toi_state = saved_state; } while (0)
+
+/*		== Module support ==		*/
+
+struct toi_core_fns {
+	int (*post_context_save)(void);
+	unsigned long (*get_nonconflicting_page)(void);
+	int (*try_hibernate)(int have_pmsem);
+	void (*try_resume)(void);
+};
+
+extern struct toi_core_fns *toi_core_fns;
+
+/*		== All else ==			*/
+#define KB(x) ((x) << (PAGE_SHIFT - 10))
+#define MB(x) ((x) >> (20 - PAGE_SHIFT))
+
+extern int toi_start_anything(int toi_or_resume);
+extern void toi_finish_anything(int toi_or_resume);
+
+extern int save_image_part1(void);
+extern int toi_atomic_restore(void);
+
+extern int _toi_try_hibernate(int have_pmsem);
+extern void __toi_try_resume(void);
+
+extern int __toi_post_context_save(void);
+
+extern unsigned int nr_hibernates;
+extern char alt_resume_param[256];
+
+extern void copyback_post(void);
+extern int toi_hibernate(void);
+extern long extra_pd1_pages_used;
+
+#define SECTOR_SIZE 512
+
+extern void toi_early_boot_message(int can_erase_image, int default_answer,
+	char *warning_reason, ...);
+
+static inline int load_direct(struct page *page)
+{
+	return test_action_state(TOI_NO_DIRECT_LOAD) ? 0 :
+		PagePageset1Copy(page);
+}
+
+extern int pre_resume_freeze(void);
+extern int do_check_can_resume(void);
+extern int do_toi_step(int step);
+extern int toi_launch_userspace_program(char *command, int channel_no,
+		enum umh_wait wait);
+
+extern char *tuxonice_signature;
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_alloc.c linux-2.6-block-custom/kernel/power/tuxonice_alloc.c
--- linux-2.6-block/kernel/power/tuxonice_alloc.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_alloc.c	2008-09-26 19:48:23.971773229 +0900
@@ -0,0 +1,293 @@
+/*
+ * kernel/power/tuxonice_alloc.c
+ *
+ * Copyright (C) 2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#ifdef CONFIG_PM_DEBUG
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+
+#define TOI_ALLOC_PATHS 39
+
+DEFINE_MUTEX(toi_alloc_mutex);
+
+static struct toi_module_ops toi_alloc_ops;
+
+static int toi_fail_num;
+static atomic_t toi_alloc_count[TOI_ALLOC_PATHS],
+		toi_free_count[TOI_ALLOC_PATHS],
+		toi_test_count[TOI_ALLOC_PATHS],
+		toi_fail_count[TOI_ALLOC_PATHS];
+int toi_cur_allocd[TOI_ALLOC_PATHS], toi_max_allocd[TOI_ALLOC_PATHS];
+int cur_allocd, max_allocd;
+
+static char *toi_alloc_desc[TOI_ALLOC_PATHS] = {
+	"", /* 0 */
+	"get_io_info_struct",
+	"extent",
+	"extent (loading chain)",
+	"userui channel",
+	"userui arg", /* 5 */
+	"attention list metadata",
+	"extra pagedir memory metadata",
+	"bdev metadata",
+	"extra pagedir memory",
+	"header_locations_read", /* 10 */
+	"bio queue",
+	"prepare_readahead",
+	"i/o buffer",
+	"writer buffer in bio_init",
+	"checksum buffer", /* 15 */
+	"compression buffer",
+	"filewriter signature op",
+	"set resume param alloc1",
+	"set resume param alloc2",
+	"debugging info buffer", /* 20 */
+	"check can resume buffer",
+	"write module config buffer",
+	"read module config buffer",
+	"write image header buffer",
+	"read pageset1 buffer", /* 25 */
+	"get_have_image_data buffer",
+	"checksum page",
+	"worker rw loop",
+	"get nonconflicting page",
+	"ps1 load addresses", /* 30 */
+	"remove swap image",
+	"swap image exists",
+	"swap parse sig location",
+	"sysfs kobj",
+	"swap mark resume attempted buffer", /* 35 */
+	"cluster member",
+	"boot kernel data buffer",
+	"setting swap signature"
+};
+
+#define MIGHT_FAIL(FAIL_NUM, FAIL_VAL) \
+	do { \
+		BUG_ON(FAIL_NUM >= TOI_ALLOC_PATHS); \
+		\
+		if (FAIL_NUM == toi_fail_num) { \
+			atomic_inc(&toi_test_count[FAIL_NUM]); \
+			toi_fail_num = 0; \
+			return FAIL_VAL; \
+		} \
+	} while (0)
+
+static void alloc_update_stats(int fail_num, void *result)
+{
+	if (!result) {
+		atomic_inc(&toi_fail_count[fail_num]);
+		return;
+	}
+
+	atomic_inc(&toi_alloc_count[fail_num]);
+	if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+		mutex_lock(&toi_alloc_mutex);
+		toi_cur_allocd[fail_num]++;
+		cur_allocd++;
+		if (unlikely(cur_allocd > max_allocd)) {
+			int i;
+
+			for (i = 0; i < TOI_ALLOC_PATHS; i++)
+				toi_max_allocd[i] = toi_cur_allocd[i];
+			max_allocd = cur_allocd;
+		}
+		mutex_unlock(&toi_alloc_mutex);
+	}
+}
+
+static void free_update_stats(int fail_num)
+{
+	BUG_ON(fail_num >= TOI_ALLOC_PATHS);
+	atomic_inc(&toi_free_count[fail_num]);
+	if (unlikely(test_action_state(TOI_GET_MAX_MEM_ALLOCD))) {
+		mutex_lock(&toi_alloc_mutex);
+		cur_allocd--;
+		toi_cur_allocd[fail_num]--;
+		mutex_unlock(&toi_alloc_mutex);
+	}
+}
+
+void *toi_kzalloc(int fail_num, size_t size, gfp_t flags)
+{
+	void *result;
+
+	if (toi_alloc_ops.enabled)
+		MIGHT_FAIL(fail_num, NULL);
+	result = kzalloc(size, flags);
+	if (toi_alloc_ops.enabled)
+		alloc_update_stats(fail_num, result);
+	return result;
+}
+
+unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+		unsigned int order)
+{
+	unsigned long result;
+
+	if (toi_alloc_ops.enabled)
+		MIGHT_FAIL(fail_num, 0);
+	result = __get_free_pages(mask, order);
+	if (toi_alloc_ops.enabled)
+		alloc_update_stats(fail_num, (void *) result);
+	return result;
+}
+
+struct page *toi_alloc_page(int fail_num, gfp_t mask)
+{
+	struct page *result;
+
+	if (toi_alloc_ops.enabled)
+		MIGHT_FAIL(fail_num, 0);
+	result = alloc_page(mask);
+	if (toi_alloc_ops.enabled)
+		alloc_update_stats(fail_num, (void *) result);
+	return result;
+}
+
+unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask)
+{
+	unsigned long result;
+
+	if (toi_alloc_ops.enabled)
+		MIGHT_FAIL(fail_num, 0);
+	result = get_zeroed_page(mask);
+	if (toi_alloc_ops.enabled)
+		alloc_update_stats(fail_num, (void *) result);
+	return result;
+}
+
+void toi_kfree(int fail_num, const void *arg)
+{
+	if (arg && toi_alloc_ops.enabled)
+		free_update_stats(fail_num);
+
+	kfree(arg);
+}
+
+void toi_free_page(int fail_num, unsigned long virt)
+{
+	if (virt && toi_alloc_ops.enabled)
+		free_update_stats(fail_num);
+
+	free_page(virt);
+}
+
+void toi__free_page(int fail_num, struct page *page)
+{
+	if (page && toi_alloc_ops.enabled)
+		free_update_stats(fail_num);
+
+	__free_page(page);
+}
+
+void toi_free_pages(int fail_num, struct page *page, int order)
+{
+	if (page && toi_alloc_ops.enabled)
+		free_update_stats(fail_num);
+
+	__free_pages(page, order);
+}
+
+void toi_alloc_print_debug_stats(void)
+{
+	int i, header_done = 0;
+
+	if (!toi_alloc_ops.enabled)
+		return;
+
+	for (i = 0; i < TOI_ALLOC_PATHS; i++)
+		if (atomic_read(&toi_alloc_count[i]) !=
+		    atomic_read(&toi_free_count[i])) {
+			if (!header_done) {
+				printk(KERN_INFO "Idx  Allocs   Frees   Tests "
+					"  Fails Max     Description\n");
+				header_done = 1;
+			}
+
+			printk(KERN_INFO "%3d %7d %7d %7d %7d %7d %s\n", i,
+				atomic_read(&toi_alloc_count[i]),
+				atomic_read(&toi_free_count[i]),
+				atomic_read(&toi_test_count[i]),
+				atomic_read(&toi_fail_count[i]),
+				toi_max_allocd[i],
+				toi_alloc_desc[i]);
+		}
+}
+EXPORT_SYMBOL_GPL(toi_alloc_print_debug_stats);
+
+static int toi_alloc_initialise(int starting_cycle)
+{
+	int i;
+
+	if (starting_cycle && toi_alloc_ops.enabled) {
+		for (i = 0; i < TOI_ALLOC_PATHS; i++) {
+			atomic_set(&toi_alloc_count[i], 0);
+			atomic_set(&toi_free_count[i], 0);
+			atomic_set(&toi_test_count[i], 0);
+			atomic_set(&toi_fail_count[i], 0);
+			toi_cur_allocd[i] = 0;
+			toi_max_allocd[i] = 0;
+		};
+		max_allocd = 0;
+		cur_allocd = 0;
+	}
+
+	return 0;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("failure_test", SYSFS_RW),
+	  SYSFS_INT(&toi_fail_num, 0, 99, 0)
+	},
+
+	{ TOI_ATTR("find_max_mem_allocated", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_GET_MAX_MEM_ALLOCD, 0)
+	},
+
+	{ TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&toi_alloc_ops.enabled, 0, 1, 0)
+	}
+};
+
+static struct toi_module_ops toi_alloc_ops = {
+	.type					= MISC_HIDDEN_MODULE,
+	.name					= "allocation debugging",
+	.directory				= "alloc",
+	.module					= THIS_MODULE,
+	.early					= 1,
+	.initialise				= toi_alloc_initialise,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+int toi_alloc_init(void)
+{
+	int result = toi_register_module(&toi_alloc_ops);
+	toi_alloc_ops.enabled = 0;
+	return result;
+}
+
+void toi_alloc_exit(void)
+{
+	toi_unregister_module(&toi_alloc_ops);
+}
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(toi_kzalloc);
+EXPORT_SYMBOL_GPL(toi_get_free_pages);
+EXPORT_SYMBOL_GPL(toi_get_zeroed_page);
+EXPORT_SYMBOL_GPL(toi_kfree);
+EXPORT_SYMBOL_GPL(toi_free_page);
+EXPORT_SYMBOL_GPL(toi__free_page);
+EXPORT_SYMBOL_GPL(toi_alloc_page);
+#endif
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_alloc.h linux-2.6-block-custom/kernel/power/tuxonice_alloc.h
--- linux-2.6-block/kernel/power/tuxonice_alloc.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_alloc.h	2008-09-26 19:48:23.975782537 +0900
@@ -0,0 +1,51 @@
+/*
+ * kernel/power/tuxonice_alloc.h
+ *
+ * Copyright (C) 2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#define TOI_WAIT_GFP (GFP_KERNEL | __GFP_NOWARN)
+#define TOI_ATOMIC_GFP (GFP_ATOMIC | __GFP_NOWARN)
+
+#ifdef CONFIG_PM_DEBUG
+extern void *toi_kzalloc(int fail_num, size_t size, gfp_t flags);
+extern void toi_kfree(int fail_num, const void *arg);
+
+extern unsigned long toi_get_free_pages(int fail_num, gfp_t mask,
+		unsigned int order);
+#define toi_get_free_page(FAIL_NUM, MASK) toi_get_free_pages(FAIL_NUM, MASK, 0)
+extern unsigned long toi_get_zeroed_page(int fail_num, gfp_t mask);
+extern void toi_free_page(int fail_num, unsigned long buf);
+extern void toi__free_page(int fail_num, struct page *page);
+extern void toi_free_pages(int fail_num, struct page *page, int order);
+extern struct page *toi_alloc_page(int fail_num, gfp_t mask);
+extern int toi_alloc_init(void);
+extern void toi_alloc_exit(void);
+
+extern void toi_alloc_print_debug_stats(void);
+
+#else /* CONFIG_PM_DEBUG */
+
+#define toi_kzalloc(FAIL, SIZE, FLAGS) (kzalloc(SIZE, FLAGS))
+#define toi_kfree(FAIL, ALLOCN) (kfree(ALLOCN))
+
+#define toi_get_free_pages(FAIL, FLAGS, ORDER) __get_free_pages(FLAGS, ORDER)
+#define toi_get_free_page(FAIL, FLAGS) __get_free_page(FLAGS)
+#define toi_get_zeroed_page(FAIL, FLAGS) get_zeroed_page(FLAGS)
+#define toi_free_page(FAIL, ALLOCN) do { free_page(ALLOCN); } while (0)
+#define toi__free_page(FAIL, PAGE) __free_page(PAGE)
+#define toi_free_pages(FAIL, PAGE, ORDER) __free_pages(PAGE, ORDER)
+#define toi_alloc_page(FAIL, MASK) alloc_page(MASK)
+static inline int toi_alloc_init(void)
+{
+	return 0;
+}
+
+static inline void toi_alloc_exit(void) { }
+
+static inline void toi_alloc_print_debug_stats(void) { }
+
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_atomic_copy.c linux-2.6-block-custom/kernel/power/tuxonice_atomic_copy.c
--- linux-2.6-block/kernel/power/tuxonice_atomic_copy.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_atomic_copy.c	2008-09-26 19:48:23.975782537 +0900
@@ -0,0 +1,384 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.c
+ *
+ * Copyright 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ * Copyright (C) 2006 Red Hat, inc.
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_ui.h"
+#include "power.h"
+#include "tuxonice_io.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+
+long extra_pd1_pages_used;
+
+/**
+ * free_pbe_list: Free page backup entries used by the atomic copy code.
+ *
+ * Normally, this function isn't used. If, however, we need to abort before
+ * doing the atomic copy, we use this to free the pbes previously allocated.
+ **/
+static void free_pbe_list(struct pbe **list, int highmem)
+{
+	while (*list) {
+		int i;
+		struct pbe *free_pbe, *next_page = NULL;
+		struct page *page;
+
+		if (highmem) {
+			page = (struct page *) *list;
+			free_pbe = (struct pbe *) kmap(page);
+		} else {
+			page = virt_to_page(*list);
+			free_pbe = *list;
+		}
+
+		for (i = 0; i < PBES_PER_PAGE; i++) {
+			if (!free_pbe)
+				break;
+			if (highmem)
+				toi__free_page(29, free_pbe->address);
+			else
+				toi_free_page(29,
+					(unsigned long) free_pbe->address);
+			free_pbe = free_pbe->next;
+		}
+
+		if (highmem) {
+			if (free_pbe)
+				next_page = free_pbe;
+			kunmap(page);
+		} else {
+			if (free_pbe)
+				next_page = free_pbe;
+		}
+
+		toi__free_page(29, page);
+		*list = (struct pbe *) next_page;
+	};
+}
+
+/**
+ * copyback_post: Post atomic-restore actions.
+ *
+ * After doing the atomic restore, we have a few more things to do:
+ * 1) We want to retain some values across the restore, so we now copy
+ * these from the nosave variables to the normal ones.
+ * 2) Set the status flags.
+ * 3) Resume devices.
+ * 4) Tell userui so it can redraw & restore settings.
+ * 5) Reread the page cache.
+ **/
+
+void copyback_post(void)
+{
+	struct toi_boot_kernel_data *bkd =
+		(struct toi_boot_kernel_data *) boot_kernel_data_buffer;
+
+	/*
+	 * The boot kernel's data may be larger (newer version) or
+	 * smaller (older version) than ours. Copy the minimum
+	 * of the two sizes, so that we don't overwrite valid values
+	 * from pre-atomic copy.
+	 */
+
+	memcpy(&toi_bkd, (char *) boot_kernel_data_buffer,
+			min_t(int, sizeof(struct toi_boot_kernel_data),
+				bkd->size));
+
+	if (toi_activate_storage(1))
+		panic("Failed to reactivate our storage.");
+
+	toi_ui_post_atomic_restore();
+
+	toi_cond_pause(1, "About to reload secondary pagedir.");
+
+	if (read_pageset2(0))
+		panic("Unable to successfully reread the page cache.");
+
+	/*
+	 * If the user wants to sleep again after resuming from full-off,
+	 * it's most likely to be in order to suspend to ram, so we'll
+	 * do this check after loading pageset2, to give them the fastest
+	 * wakeup when they are ready to use the computer again.
+	 */
+	toi_check_resleep();
+}
+
+/**
+ * toi_copy_pageset1: Do the atomic copy of pageset1.
+ *
+ * Make the atomic copy of pageset1. We can't use copy_page (as we once did)
+ * because we can't be sure what side effects it has. On my old Duron, with
+ * 3DNOW, kernel_fpu_begin increments preempt count, making our preempt
+ * count at resume time 4 instead of 3.
+ *
+ * We don't want to call kmap_atomic unconditionally because it has the side
+ * effect of incrementing the preempt count, which will leave it one too high
+ * post resume (the page containing the preempt count will be copied after
+ * its incremented. This is essentially the same problem.
+ **/
+
+void toi_copy_pageset1(void)
+{
+	int i;
+	unsigned long source_index, dest_index;
+
+	source_index = get_next_bit_on(&pageset1_map, max_pfn + 1);
+	dest_index = get_next_bit_on(&pageset1_copy_map, max_pfn + 1);
+
+	for (i = 0; i < pagedir1.size; i++) {
+		unsigned long *origvirt, *copyvirt;
+		struct page *origpage, *copypage;
+		int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1,
+                   was_present;
+
+		origpage = pfn_to_page(source_index);
+		copypage = pfn_to_page(dest_index);
+
+		origvirt = PageHighMem(origpage) ?
+			kmap_atomic(origpage, KM_USER0) :
+			page_address(origpage);
+
+		copyvirt = PageHighMem(copypage) ?
+			kmap_atomic(copypage, KM_USER1) :
+			page_address(copypage);
+
+ 		was_present = kernel_page_present(origpage);
+ 		if (!was_present)
+ 			kernel_map_pages(origpage, 1, 1);
+
+		while (loop >= 0) {
+			*(copyvirt + loop) = *(origvirt + loop);
+			loop--;
+		}
+
+ 		if (!was_present)
+ 			kernel_map_pages(origpage, 1, 0);
+
+		if (PageHighMem(origpage))
+			kunmap_atomic(origvirt, KM_USER0);
+
+		if (PageHighMem(copypage))
+			kunmap_atomic(copyvirt, KM_USER1);
+
+		source_index = get_next_bit_on(&pageset1_map, source_index);
+		dest_index = get_next_bit_on(&pageset1_copy_map, dest_index);
+	}
+}
+
+/**
+ * __toi_post_context_save: Steps after saving the cpu context.
+ *
+ * Steps taken after saving the CPU state to make the actual
+ * atomic copy.
+ *
+ * Called from swsusp_save in snapshot.c via toi_post_context_save.
+ **/
+
+int __toi_post_context_save(void)
+{
+	long old_ps1_size = pagedir1.size;
+
+	check_checksums();
+
+	free_checksum_pages();
+
+	toi_recalculate_image_contents(1);
+
+	extra_pd1_pages_used = pagedir1.size - old_ps1_size;
+
+	if (extra_pd1_pages_used > extra_pd1_pages_allowance) {
+		printk(KERN_INFO "Pageset1 has grown by %ld pages. "
+			"extra_pages_allowance is currently only %lu.\n",
+			pagedir1.size - old_ps1_size,
+			extra_pd1_pages_allowance);
+		set_abort_result(TOI_EXTRA_PAGES_ALLOW_TOO_SMALL);
+		return -1;
+	}
+
+	if (!test_action_state(TOI_TEST_FILTER_SPEED) &&
+	    !test_action_state(TOI_TEST_BIO))
+		toi_copy_pageset1();
+
+	return 0;
+}
+
+/**
+ * toi_hibernate: High level code for doing the atomic copy.
+ *
+ * High-level code which prepares to do the atomic copy. Loosely based
+ * on the swsusp version, but with the following twists:
+ * - We set toi_running so the swsusp code uses our code paths.
+ * - We give better feedback regarding what goes wrong if there is a problem.
+ * - We use an extra function to call the assembly, just in case this code
+ *   is in a module (return address).
+ **/
+
+int toi_hibernate(void)
+{
+	int error;
+
+	toi_running = 1; /* For the swsusp code we use :< */
+
+	error = toi_lowlevel_builtin();
+
+	toi_running = 0;
+	return error;
+}
+
+/**
+ * toi_atomic_restore: Prepare to do the atomic restore.
+ *
+ * Get ready to do the atomic restore. This part gets us into the same
+ * state we are in prior to do calling do_toi_lowlevel while
+ * hibernating: hot-unplugging secondary cpus and freeze processes,
+ * before starting the thread that will do the restore.
+ **/
+
+int toi_atomic_restore(void)
+{
+	int error;
+
+	toi_running = 1;
+
+	toi_prepare_status(DONT_CLEAR_BAR,	"Atomic restore.");
+
+	if (add_boot_kernel_data_pbe())
+		goto Failed;
+
+	if (toi_go_atomic(PMSG_QUIESCE, 0))
+		goto Failed;
+
+	/* We'll ignore saved state, but this gets preempt count (etc) right */
+	save_processor_state();
+
+	error = swsusp_arch_resume();
+	/*
+	 * Code below is only ever reached in case of failure. Otherwise
+	 * execution continues at place where swsusp_arch_suspend was called.
+	 *
+	 * We don't know whether it's safe to continue (this shouldn't happen),
+	 * so lets err on the side of caution.
+	 */
+	BUG();
+
+Failed:
+	free_pbe_list(&restore_pblist, 0);
+#ifdef CONFIG_HIGHMEM
+	free_pbe_list(&restore_highmem_pblist, 1);
+#endif
+	if (test_action_state(TOI_PM_PREPARE_CONSOLE))
+		pm_restore_console();
+	toi_running = 0;
+	return 1;
+}
+
+int toi_go_atomic(pm_message_t state, int suspend_time)
+{
+	toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy/restore.");
+
+	if (suspend_time && toi_platform_begin()) {
+		set_abort_result(TOI_PLATFORM_PREP_FAILED);
+		toi_end_atomic(ATOMIC_STEP_PLATFORM_END, suspend_time, 0);
+		return 1;
+	}
+
+	suspend_console();
+
+	if (device_suspend(state)) {
+		set_abort_result(TOI_DEVICE_REFUSED);
+		toi_end_atomic(ATOMIC_STEP_RESUME_CONSOLE, suspend_time, 1);
+		return 1;
+	}
+
+	if (suspend_time && toi_platform_pre_snapshot()) {
+		set_abort_result(TOI_PRE_SNAPSHOT_FAILED);
+		toi_end_atomic(ATOMIC_STEP_PLATFORM_FINISH, suspend_time, 0);
+		return 1;
+	}
+
+	if (!suspend_time && toi_platform_pre_restore()) {
+		set_abort_result(TOI_PRE_RESTORE_FAILED);
+		toi_end_atomic(ATOMIC_STEP_DEVICE_RESUME, suspend_time, 0);
+		return 1;
+	}
+
+	if (test_action_state(TOI_LATE_CPU_HOTPLUG)) {
+		if (disable_nonboot_cpus()) {
+			set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+			toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG,
+					suspend_time, 0);
+			return 1;
+		}
+	}
+
+	if (suspend_time && arch_prepare_suspend()) {
+		set_abort_result(TOI_ARCH_PREPARE_FAILED);
+		toi_end_atomic(ATOMIC_STEP_CPU_HOTPLUG, suspend_time, 0);
+		return 1;
+	}
+
+        device_pm_lock();
+	local_irq_disable();
+
+	/* At this point, device_suspend() has been called, but *not*
+	 * device_power_down(). We *must* device_power_down() now.
+	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
+	 * become desynchronized with the actual state of the hardware
+	 * at resume time, and evil weirdness ensues.
+	 */
+
+	if (device_power_down(state)) {
+		set_abort_result(TOI_DEVICE_REFUSED);
+		toi_end_atomic(ATOMIC_STEP_IRQS, suspend_time, 0);
+		return 1;
+	}
+
+	return 0;
+}
+
+void toi_end_atomic(int stage, int suspend_time, int error)
+{
+	switch (stage) {
+	case ATOMIC_ALL_STEPS:
+		if (!suspend_time)
+			toi_platform_leave();
+		device_power_up(error ? PMSG_RECOVER :
+			(suspend_time ? PMSG_THAW : PMSG_RESTORE));
+	case ATOMIC_STEP_IRQS:
+		local_irq_enable();
+		device_pm_unlock();
+	case ATOMIC_STEP_CPU_HOTPLUG:
+		if (test_action_state(TOI_LATE_CPU_HOTPLUG))
+			enable_nonboot_cpus();
+	case ATOMIC_STEP_PLATFORM_FINISH:
+		toi_platform_finish();
+	case ATOMIC_STEP_DEVICE_RESUME:
+ 		device_resume(error ? PMSG_RECOVER :
+ 			(suspend_time ? PMSG_THAW : PMSG_RESTORE));
+	case ATOMIC_STEP_RESUME_CONSOLE:
+		resume_console();
+	case ATOMIC_STEP_PLATFORM_END:
+		toi_platform_end();
+
+		toi_prepare_status(DONT_CLEAR_BAR, "Post atomic.");
+	}
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_atomic_copy.h linux-2.6-block-custom/kernel/power/tuxonice_atomic_copy.h
--- linux-2.6-block/kernel/power/tuxonice_atomic_copy.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_atomic_copy.h	2008-09-26 19:48:23.975782537 +0900
@@ -0,0 +1,22 @@
+/*
+ * kernel/power/tuxonice_atomic_copy.h
+ *
+ * Copyright 2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * Distributed under GPLv2.
+ *
+ * Routines for doing the atomic save/restore.
+ */
+
+enum {
+	ATOMIC_ALL_STEPS,
+	ATOMIC_STEP_IRQS,
+	ATOMIC_STEP_CPU_HOTPLUG,
+	ATOMIC_STEP_PLATFORM_FINISH,
+	ATOMIC_STEP_DEVICE_RESUME,
+	ATOMIC_STEP_RESUME_CONSOLE,
+	ATOMIC_STEP_PLATFORM_END,
+};
+
+int toi_go_atomic(pm_message_t state, int toi_time);
+void toi_end_atomic(int stage, int toi_time, int error);
diff -Npur linux-2.6-block/kernel/power/tuxonice_block_io.c linux-2.6-block-custom/kernel/power/tuxonice_block_io.c
--- linux-2.6-block/kernel/power/tuxonice_block_io.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_block_io.c	2008-09-26 19:48:23.979754551 +0900
@@ -0,0 +1,1193 @@
+/*
+ * kernel/power/tuxonice_block_io.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains block io functions for TuxOnIce. These are
+ * used by the swapwriter and it is planned that they will also
+ * be used by the NFSwriter.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/syscalls.h>
+#include <linux/suspend.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_block_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+
+
+#if 0
+static int pr_index;
+
+static inline void reset_pr_index(void)
+{
+	pr_index = 0;
+}
+
+#define PR_DEBUG(a, b...) do { \
+	if (pr_index < 20) \
+		printk(a, ##b); \
+} while (0)
+
+static inline void inc_pr_index(void)
+{
+	pr_index++;
+}
+#else
+#define PR_DEBUG(a, b...) do { } while (0)
+#define reset_pr_index() do { } while (0)
+#define inc_pr_index do { } while (0)
+#endif
+
+#define TARGET_OUTSTANDING_IO 16384
+
+#define MEASURE_MUTEX_CONTENTION
+#ifndef MEASURE_MUTEX_CONTENTION
+#define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
+#define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
+#else
+unsigned long mutex_times[2][2][NR_CPUS];
+#define my_mutex_lock(index, the_lock) do { \
+	int have_mutex; \
+	have_mutex = mutex_trylock(the_lock); \
+	if (!have_mutex) { \
+		mutex_lock(the_lock); \
+		mutex_times[index][0][smp_processor_id()]++; \
+	} else { \
+		mutex_times[index][1][smp_processor_id()]++; \
+	}
+
+#define my_mutex_unlock(index, the_lock) \
+	mutex_unlock(the_lock); \
+} while (0)
+#endif
+
+static int target_outstanding_io = 1024;
+static int max_outstanding_writes, max_outstanding_reads;
+
+static struct page *bio_queue_head, *bio_queue_tail;
+static DEFINE_SPINLOCK(bio_queue_lock);
+
+static int free_mem_throttle;
+static int more_readahead = 1;
+static struct page *readahead_list_head, *readahead_list_tail;
+
+static struct page *waiting_on;
+
+static atomic_t toi_io_in_progress;
+static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
+
+static int extra_page_forward;
+
+static int current_stream;
+/* 0 = Header, 1 = Pageset1, 2 = Pageset2, 3 = End of PS1 */
+struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
+
+/* Pointer to current entry being loaded/saved. */
+struct hibernate_extent_iterate_state toi_writer_posn;
+
+/* Not static, so that the allocators can setup and complete
+ * writing the header */
+char *toi_writer_buffer;
+int toi_writer_buffer_posn;
+
+static struct toi_bdev_info *toi_devinfo;
+
+DEFINE_MUTEX(toi_bio_mutex);
+
+static struct task_struct *toi_queue_flusher;
+static int toi_bio_queue_flush_pages(int dedicated_thread);
+
+/**
+ * set_throttle: Set the point where we pause to avoid oom.
+ *
+ * Initially, this value is zero, but when we first fail to allocate memory,
+ * we set it (plus a buffer) and thereafter throttle i/o once that limit is
+ * reached.
+ */
+
+static void set_throttle(void)
+{
+	int new_throttle = nr_unallocated_buffer_pages() + 256;
+
+	if (new_throttle > free_mem_throttle)
+		free_mem_throttle = new_throttle;
+}
+
+#define NUM_REASONS 10
+static atomic_t reasons[NUM_REASONS];
+static char *reason_name[NUM_REASONS] = {
+	"readahead not ready",
+	"bio allocation",
+	"io_struct allocation",
+	"submit buffer",
+	"synchronous I/O",
+	"bio mutex when reading",
+	"bio mutex when writing",
+	"toi_bio_get_new_page",
+	"memory low",
+	"readahead buffer allocation"
+};
+
+/**
+ * do_bio_wait: Wait for some TuxOnIce i/o to complete.
+ *
+ * Submit any I/O that's batched up (if we're not already doing
+ * that, schedule and clean up whatever we can.
+ */
+static void do_bio_wait(int reason)
+{
+	struct page *was_waiting_on = waiting_on;
+
+	/* On SMP, waiting_on can be reset, so we make a copy */
+	if (was_waiting_on) {
+		if (PageLocked(was_waiting_on)) {
+			wait_on_page_bit(was_waiting_on, PG_locked);
+			atomic_inc(&reasons[reason]);
+		}
+	} else {
+		atomic_inc(&reasons[reason]);
+
+		wait_event(num_in_progress_wait,
+			!atomic_read(&toi_io_in_progress) ||
+			nr_unallocated_buffer_pages() > free_mem_throttle);
+	}
+}
+
+static void throttle_if_memory_low(void)
+{
+	int free_pages = nr_unallocated_buffer_pages();
+
+	/* Getting low on memory and I/O is in progress? */
+	while (unlikely(free_pages < free_mem_throttle) &&
+			atomic_read(&toi_io_in_progress)) {
+		do_bio_wait(8);
+		free_pages = nr_unallocated_buffer_pages();
+	}
+}
+
+/**
+ * toi_finish_all_io: Complete all outstanding i/o.
+ */
+static void toi_finish_all_io(void)
+{
+	wait_event(num_in_progress_wait, !atomic_read(&toi_io_in_progress));
+}
+
+/**
+ * toi_end_bio: bio completion function.
+ *
+ * @bio: bio that has completed.
+ * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
+ *
+ * Function called by block driver from interrupt context when I/O is completed.
+ * Nearly the fs/buffer.c version, but we want to do our cleanup too. We only
+ * free pages if they were buffers used when writing the image.
+ */
+static void toi_end_bio(struct bio *bio, int err)
+{
+	struct page *page = bio->bi_io_vec[0].bv_page;
+
+	BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
+
+	unlock_page(page);
+	bio_put(bio);
+
+	if (waiting_on == page)
+		waiting_on = NULL;
+
+	put_page(page);
+
+	if (bio->bi_private)
+		toi__free_page((int) ((unsigned long) bio->bi_private) , page);
+
+	bio_put(bio);
+
+	atomic_dec(&toi_io_in_progress);
+
+	wake_up(&num_in_progress_wait);
+}
+
+/**
+ *	submit - submit BIO request.
+ *	@writing: READ or WRITE.
+ *
+ * 	Based on Patrick's pmdisk code from long ago:
+ *	"Straight from the textbook - allocate and initialize the bio.
+ *	If we're writing, make sure the page is marked as dirty.
+ *	Then submit it and carry on."
+ *
+ *	With a twist, though - we handle block_size != PAGE_SIZE.
+ *	Caller has already checked that our page is not fragmented.
+ */
+static int submit(int writing, struct block_device *dev, sector_t first_block,
+		struct page *page, int free_group)
+{
+	struct bio *bio = NULL;
+	int cur_outstanding_io;
+
+	throttle_if_memory_low();
+
+	while (!bio) {
+		bio = bio_alloc(TOI_ATOMIC_GFP, 1);
+		if (!bio) {
+			set_throttle();
+			do_bio_wait(1);
+		}
+	}
+
+	bio->bi_bdev = dev;
+	bio->bi_sector = first_block;
+	bio->bi_private = (void *) ((unsigned long) free_group);
+	bio->bi_end_io = toi_end_bio;
+
+	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+		printk(KERN_INFO "ERROR: adding page to bio at %lld\n",
+				(unsigned long long) first_block);
+		bio_put(bio);
+		return -EFAULT;
+	}
+
+	bio_get(bio);
+
+	cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
+	if (writing) {
+		if (cur_outstanding_io > max_outstanding_writes)
+			max_outstanding_writes = cur_outstanding_io;
+	} else {
+		if (cur_outstanding_io > max_outstanding_reads)
+			max_outstanding_reads = cur_outstanding_io;
+	}
+
+
+	if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED))) {
+		/* Fake having done the hard work */
+		set_bit(BIO_UPTODATE, &bio->bi_flags);
+		toi_end_bio(bio, 0);
+	} else
+		submit_bio(writing | (1 << BIO_RW_SYNC), bio);
+
+	return 0;
+}
+
+/**
+ * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
+ *
+ * @writing: Whether reading or writing.
+ * @bdev: The block device which we're using.
+ * @block0: The first sector we're reading or writing.
+ * @page: The page on which I/O is being done.
+ * @readahead_index: If doing readahead, the index (reset this flag when done).
+ * @syncio: Whether the i/o is being done synchronously.
+ *
+ * Prepare and start a read or write operation.
+ *
+ * Note that we always work with our own page. If writing, we might be given a
+ * compression buffer that will immediately be used to start compressing the
+ * next page. For reading, we do readahead and therefore don't know the final
+ * address where the data needs to go.
+ */
+static int toi_do_io(int writing, struct block_device *bdev, long block0,
+	struct page *page, int is_readahead, int syncio, int free_group)
+{
+	page->private = 0;
+
+	/* Do here so we don't race against toi_bio_get_next_page_read */
+	lock_page(page);
+
+	if (is_readahead) {
+		if (readahead_list_head)
+			readahead_list_tail->private = (unsigned long) page;
+		else
+			readahead_list_head = page;
+
+		readahead_list_tail = page;
+	}
+
+	/* Done before submitting to avoid races. */
+	if (syncio)
+		waiting_on = page;
+
+	/* Submit the page */
+	get_page(page);
+
+	if (submit(writing, bdev, block0, page, free_group))
+		return -EFAULT;
+
+	if (syncio)
+		do_bio_wait(4);
+
+	return 0;
+}
+
+/**
+ * toi_bdev_page_io: Simpler interface to do directly i/o on a single page.
+ *
+ * @writing: Whether reading or writing.
+ * @bdev: Block device on which we're operating.
+ * @pos: Sector at which page to read starts.
+ * @page: Page to be read/written.
+ *
+ * We used to use bread here, but it doesn't correctly handle
+ * blocksize != PAGE_SIZE. Now we create a submit_info to get the data we
+ * want and use our normal routines (synchronously).
+ */
+static int toi_bdev_page_io(int writing, struct block_device *bdev,
+		long pos, struct page *page)
+{
+	return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
+}
+
+/**
+ * toi_bio_memory_needed: Report amount of memory needed for block i/o.
+ *
+ * We want to have at least enough memory so as to have target_outstanding_io
+ * or more transactions on the fly at once. If we can do more, fine.
+ */
+static int toi_bio_memory_needed(void)
+{
+	return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) +
+				sizeof(struct bio));
+}
+
+/*
+ * toi_bio_print_debug_stats
+ *
+ * Description:
+ */
+static int toi_bio_print_debug_stats(char *buffer, int size)
+{
+	int len = snprintf_used(buffer, size, "- Max outstanding reads %d. Max "
+			"writes %d.\n", max_outstanding_reads,
+			max_outstanding_writes);
+
+	len += snprintf_used(buffer + len, size - len,
+		"  Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
+		target_outstanding_io,
+		PAGE_SIZE, (unsigned int) sizeof(struct request),
+		(unsigned int) sizeof(struct bio), toi_bio_memory_needed());
+
+#ifdef MEASURE_MUTEX_CONTENTION
+	{
+	int i;
+
+	len += snprintf_used(buffer + len, size - len,
+		"  Mutex contention while reading:\n  Contended      Free\n");
+
+	for_each_online_cpu(i)
+		len += snprintf_used(buffer + len, size - len,
+		"  %9lu %9lu\n",
+		mutex_times[0][0][i], mutex_times[0][1][i]);
+
+	len += snprintf_used(buffer + len, size - len,
+		"  Mutex contention while writing:\n  Contended      Free\n");
+
+	for_each_online_cpu(i)
+		len += snprintf_used(buffer + len, size - len,
+		"  %9lu %9lu\n",
+		mutex_times[1][0][i], mutex_times[1][1][i]);
+
+	}
+#endif
+
+	return len + snprintf_used(buffer + len, size - len,
+		"  Free mem throttle point reached %d.\n", free_mem_throttle);
+}
+
+/**
+ * toi_set_devinfo: Set the bdev info used for i/o.
+ *
+ * @info: Pointer to array of struct toi_bdev_info - the list of
+ * bdevs and blocks on them in which the image is stored.
+ *
+ * Set the list of bdevs and blocks in which the image will be stored.
+ * Sort of like putting a tape in the cassette player.
+ */
+static void toi_set_devinfo(struct toi_bdev_info *info)
+{
+	toi_devinfo = info;
+}
+
+/**
+ * dump_block_chains: Print the contents of the bdev info array.
+ */
+static void dump_block_chains(void)
+{
+	int i;
+
+	for (i = 0; i < toi_writer_posn.num_chains; i++) {
+		struct hibernate_extent *this;
+
+		this = (toi_writer_posn.chains + i)->first;
+
+		if (!this)
+			continue;
+
+		printk(KERN_INFO "Chain %d:", i);
+
+		while (this) {
+			printk(" [%lu-%lu]%s", this->start,
+					this->end, this->next ? "," : "");
+			this = this->next;
+		}
+
+		printk("\n");
+	}
+
+	for (i = 0; i < 4; i++)
+		printk(KERN_INFO "Posn %d: Chain %d, extent %d, offset %lu.\n",
+				i, toi_writer_posn_save[i].chain_num,
+				toi_writer_posn_save[i].extent_num,
+				toi_writer_posn_save[i].offset);
+}
+
+/**
+ * go_next_page: Skip blocks to the start of the next page.
+ *
+ * Go forward one page, or two if extra_page_forward is set. It only gets
+ * set at the start of reading the image header, to skip the first page
+ * of the header, which is read without using the extent chains.
+ */
+static int go_next_page(int writing)
+{
+	int i, max = (toi_writer_posn.current_chain == -1) ? 1 :
+	  toi_devinfo[toi_writer_posn.current_chain].blocks_per_page;
+
+	for (i = 0; i < max; i++)
+		toi_extent_state_next(&toi_writer_posn);
+
+	if (toi_extent_state_eof(&toi_writer_posn)) {
+		/* Don't complain if readahead falls off the end */
+		if (writing) {
+			printk(KERN_INFO "Extent state eof. "
+				"Expected compression ratio too optimistic?\n");
+			dump_block_chains();
+		}
+		return -ENODATA;
+	}
+
+	if (extra_page_forward) {
+		extra_page_forward = 0;
+		return go_next_page(writing);
+	}
+
+	return 0;
+}
+
+/**
+ * set_extra_page_forward: Make us skip an extra page on next go_next_page.
+ *
+ * Used in reading header, to jump to 2nd page after getting 1st page
+ * direct from image header.
+ */
+static void set_extra_page_forward(void)
+{
+	extra_page_forward = 1;
+}
+
+/**
+ * toi_bio_rw_page: Do i/o on the next disk page in the image.
+ *
+ * @writing: Whether reading or writing.
+ * @page: Page to do i/o on.
+ * @readahead_index: -1 or the index in the readahead ring.
+ *
+ * Submit a page for reading or writing, possibly readahead.
+ */
+static int toi_bio_rw_page(int writing, struct page *page,
+		int is_readahead, int free_group)
+{
+	struct toi_bdev_info *dev_info;
+	int result;
+
+	if (go_next_page(writing)) {
+		printk(KERN_INFO "Failed to advance a page in the extent "
+				"data.\n");
+		return -ENODATA;
+	}
+
+	if (current_stream == 0 && writing &&
+		toi_writer_posn.current_chain ==
+			toi_writer_posn_save[2].chain_num &&
+		toi_writer_posn.current_offset ==
+			toi_writer_posn_save[2].offset) {
+		dump_block_chains();
+		BUG();
+	}
+
+	dev_info = &toi_devinfo[toi_writer_posn.current_chain];
+
+	result = toi_do_io(writing, dev_info->bdev,
+		toi_writer_posn.current_offset <<
+			dev_info->bmap_shift,
+		page, is_readahead, 0, free_group);
+
+	if (result) {
+		more_readahead = 0;
+		return result;
+	}
+
+	if (!writing) {
+		int compare_to = 0;
+
+		switch (current_stream) {
+		case 0:
+			compare_to = 2;
+			break;
+		case 1:
+			compare_to = 3;
+			break;
+		case 2:
+			compare_to = 1;
+			break;
+		}
+
+		if (toi_writer_posn.current_chain ==
+				toi_writer_posn_save[compare_to].chain_num &&
+		    toi_writer_posn.current_offset ==
+				toi_writer_posn_save[compare_to].offset)
+			more_readahead = 0;
+	}
+	return 0;
+}
+
+/**
+ * toi_rw_init: Prepare to read or write a stream in the image.
+ *
+ * @writing: Whether reading or writing.
+ * @stream number: Section of the image being processed.
+ */
+static int toi_rw_init(int writing, int stream_number)
+{
+	if (stream_number)
+		toi_extent_state_restore(&toi_writer_posn,
+				&toi_writer_posn_save[stream_number]);
+	else
+		toi_extent_state_goto_start(&toi_writer_posn);
+
+	toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+	toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
+
+	current_stream = stream_number;
+
+	reset_pr_index();
+	more_readahead = 1;
+
+	return toi_writer_buffer ? 0 : -ENOMEM;
+}
+
+/**
+ * toi_read_header_init: Prepare to read the image header.
+ *
+ * Reset readahead indices prior to starting to read a section of the image.
+ */
+static void toi_read_header_init(void)
+{
+	toi_writer_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+	more_readahead = 1;
+}
+
+/*
+ * toi_bio_queue_write
+ */
+static void toi_bio_queue_write(char **full_buffer)
+{
+	struct page *page = virt_to_page(*full_buffer);
+	unsigned long flags;
+
+	page->private = 0;
+
+	spin_lock_irqsave(&bio_queue_lock, flags);
+	if (!bio_queue_head)
+		bio_queue_head = page;
+	else
+		bio_queue_tail->private = (unsigned long) page;
+
+	bio_queue_tail = page;
+
+	spin_unlock_irqrestore(&bio_queue_lock, flags);
+	wake_up(&toi_io_queue_flusher);
+
+	*full_buffer = NULL;
+}
+
+/**
+ * toi_rw_cleanup: Cleanup after i/o.
+ *
+ * @writing: Whether we were reading or writing.
+ */
+static int toi_rw_cleanup(int writing)
+{
+	int i;
+
+	if (writing) {
+		int result;
+
+		if (toi_writer_buffer_posn)
+			toi_bio_queue_write(&toi_writer_buffer);
+
+		result = toi_bio_queue_flush_pages(0);
+
+		if (result)
+			return result;
+
+		if (current_stream == 2)
+			toi_extent_state_save(&toi_writer_posn,
+					&toi_writer_posn_save[1]);
+		else if (current_stream == 1)
+			toi_extent_state_save(&toi_writer_posn,
+					&toi_writer_posn_save[3]);
+	}
+
+	toi_finish_all_io();
+
+	while (readahead_list_head) {
+		void *next = (void *) readahead_list_head->private;
+		toi__free_page(12, readahead_list_head);
+		readahead_list_head = next;
+	}
+
+	readahead_list_tail = NULL;
+
+	if (!current_stream)
+		return 0;
+
+	for (i = 0; i < NUM_REASONS; i++) {
+		if (!atomic_read(&reasons[i]))
+			continue;
+		printk(KERN_INFO "Waited for i/o due to %s %d times.\n",
+				reason_name[i], atomic_read(&reasons[i]));
+		atomic_set(&reasons[i], 0);
+	}
+
+	current_stream = 0;
+	return 0;
+}
+
+int toi_start_one_readahead(int dedicated_thread)
+{
+	char *buffer = NULL;
+	int oom = 0;
+
+	throttle_if_memory_low();
+
+	while (!buffer) {
+		buffer = (char *) toi_get_zeroed_page(12,
+				TOI_ATOMIC_GFP);
+		if (!buffer) {
+			if (oom && !dedicated_thread)
+				return -EIO;
+
+			oom = 1;
+			set_throttle();
+			do_bio_wait(9);
+		}
+	}
+
+	return toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
+}
+
+/*
+ * toi_start_new_readahead
+ *
+ * Start readahead of image pages.
+ *
+ * No mutex needed because this is only ever called by one cpu.
+ */
+static int toi_start_new_readahead(int dedicated_thread)
+{
+	int last_result, num_submitted = 0;
+
+	/* Start a new readahead? */
+	if (!more_readahead)
+		return 0;
+
+	do {
+		int result = toi_start_one_readahead(dedicated_thread);
+
+		if (result == -EIO)
+			return 0;
+		else
+			last_result = result;
+
+		if (last_result == -ENODATA)
+			more_readahead = 0;
+
+		if (!more_readahead && last_result) {
+			/*
+			 * Don't complain about failing to do readahead past
+			 * the end of storage.
+			 */
+			if (last_result != -ENODATA)
+				printk(KERN_INFO
+					"Begin read chunk returned %d.\n",
+					last_result);
+		} else
+			num_submitted++;
+
+	} while (more_readahead &&
+		 (dedicated_thread ||
+		  (num_submitted < target_outstanding_io &&
+		   atomic_read(&toi_io_in_progress) < target_outstanding_io)));
+	return 0;
+}
+
+static void bio_io_flusher(int writing)
+{
+
+	if (writing)
+		toi_bio_queue_flush_pages(1);
+	else
+		toi_start_new_readahead(1);
+}
+
+/**
+ * toi_bio_get_next_page_read: Read a disk page with readahead.
+ *
+ * Read a page from disk, submitting readahead and cleaning up finished i/o
+ * while we wait for the page we're after.
+ */
+static int toi_bio_get_next_page_read(int no_readahead)
+{
+	unsigned long *virt;
+	struct page *next;
+
+	/*
+	 * When reading the second page of the header, we have to
+	 * delay submitting the read until after we've gotten the
+	 * extents out of the first page.
+	 */
+	if (unlikely(no_readahead && toi_start_one_readahead(0))) {
+		printk(KERN_INFO "No readahead and toi_start_one_readahead "
+				"returned non-zero.\n");
+		return -EIO;
+	}
+
+	/*
+	 * On SMP, we may need to wait for the first readahead
+	 * to be submitted.
+	 */
+	if (unlikely(!readahead_list_head)) {
+		BUG_ON(!more_readahead);
+		do {
+			cpu_relax();
+		} while (!readahead_list_head);
+	}
+
+	if (PageLocked(readahead_list_head)) {
+		waiting_on = readahead_list_head;
+		do_bio_wait(0);
+	}
+
+	virt = page_address(readahead_list_head);
+	memcpy(toi_writer_buffer, virt, PAGE_SIZE);
+
+	next = (struct page *) readahead_list_head->private;
+	toi__free_page(12, readahead_list_head);
+	readahead_list_head = next;
+	return 0;
+}
+
+/*
+ * toi_bio_queue_flush_pages
+ */
+
+static int toi_bio_queue_flush_pages(int dedicated_thread)
+{
+	unsigned long flags;
+	int result = 0;
+
+top:
+	spin_lock_irqsave(&bio_queue_lock, flags);
+	while (bio_queue_head) {
+		struct page *page = bio_queue_head;
+		bio_queue_head = (struct page *) page->private;
+		if (bio_queue_tail == page)
+			bio_queue_tail = NULL;
+		spin_unlock_irqrestore(&bio_queue_lock, flags);
+		result = toi_bio_rw_page(WRITE, page, 0, 11);
+		if (result)
+			return result;
+		spin_lock_irqsave(&bio_queue_lock, flags);
+	}
+	spin_unlock_irqrestore(&bio_queue_lock, flags);
+
+	if (dedicated_thread) {
+		wait_event(toi_io_queue_flusher, bio_queue_head ||
+				toi_bio_queue_flusher_should_finish);
+		if (likely(!toi_bio_queue_flusher_should_finish))
+			goto top;
+		toi_bio_queue_flusher_should_finish = 0;
+	}
+	return 0;
+}
+
+/*
+ * toi_bio_get_new_page
+ */
+static void toi_bio_get_new_page(char **full_buffer)
+{
+	throttle_if_memory_low();
+
+	while (!*full_buffer) {
+		*full_buffer = (char *) toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
+		if (!*full_buffer) {
+			set_throttle();
+			do_bio_wait(7);
+		}
+	}
+}
+
+/*
+ * toi_rw_buffer: Combine smaller buffers into PAGE_SIZE I/O.
+ *
+ * @writing: Bool - whether writing (or reading).
+ * @buffer: The start of the buffer to write or fill.
+ * @buffer_size: The size of the buffer to write or fill.
+ */
+static int toi_rw_buffer(int writing, char *buffer, int buffer_size,
+		int no_readahead)
+{
+	int bytes_left = buffer_size;
+
+	while (bytes_left) {
+		char *source_start = buffer + buffer_size - bytes_left;
+		char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
+		int capacity = PAGE_SIZE - toi_writer_buffer_posn;
+		char *to = writing ? dest_start : source_start;
+		char *from = writing ? source_start : dest_start;
+
+		if (bytes_left <= capacity) {
+			memcpy(to, from, bytes_left);
+			toi_writer_buffer_posn += bytes_left;
+			return 0;
+		}
+
+		/* Complete this page and start a new one */
+		memcpy(to, from, capacity);
+		bytes_left -= capacity;
+
+		if (!writing) {
+			int result = toi_bio_get_next_page_read(no_readahead);
+			if (result)
+				return result;
+		} else {
+			toi_bio_queue_write(&toi_writer_buffer);
+			toi_bio_get_new_page(&toi_writer_buffer);
+		}
+
+		toi_writer_buffer_posn = 0;
+		toi_cond_pause(0, NULL);
+	}
+
+	return 0;
+}
+
+/**
+ * toi_bio_read_page - read a page of the image.
+ *
+ * @pfn: The pfn where the data belongs.
+ * @buffer_page: The page containing the (possibly compressed) data.
+ * @buf_size: The number of bytes on @buffer_page used.
+ *
+ * Read a (possibly compressed) page from the image, into buffer_page,
+ * returning its pfn and the buffer size.
+ */
+static int toi_bio_read_page(unsigned long *pfn, struct page *buffer_page,
+		unsigned int *buf_size)
+{
+	int result = 0;
+	char *buffer_virt = kmap(buffer_page);
+
+	inc_pr_index;
+
+	/* Only call start_new_readahead if we don't have a dedicated thread */
+	if (current == toi_queue_flusher && toi_start_new_readahead(0)) {
+		printk(KERN_INFO "Queue flusher and toi_start_one_readahead "
+				"returned non-zero.\n");
+		return -EIO;
+	}
+
+	my_mutex_lock(0, &toi_bio_mutex);
+
+	if (toi_rw_buffer(READ, (char *) pfn, sizeof(unsigned long), 0) ||
+	    toi_rw_buffer(READ, (char *) buf_size, sizeof(int), 0) ||
+	    toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
+		abort_hibernate(TOI_FAILED_IO, "Read of data failed.");
+		result = 1;
+	} else
+		PR_DEBUG("%d: PFN %ld, %d bytes.\n", pr_index, *pfn, *buf_size);
+
+	my_mutex_unlock(0, &toi_bio_mutex);
+	kunmap(buffer_page);
+	return result;
+}
+
+/**
+ * toi_bio_write_page - Write a page of the image.
+ *
+ * @pfn: The pfn where the data belongs.
+ * @buffer_page: The page containing the (possibly compressed) data.
+ * @buf_size: The number of bytes on @buffer_page used.
+ *
+ * Write a (possibly compressed) page to the image from the buffer, together
+ * with it's index and buffer size.
+ */
+static int toi_bio_write_page(unsigned long pfn, struct page *buffer_page,
+		unsigned int buf_size)
+{
+	char *buffer_virt;
+	int result = 0, result2 = 0;
+
+	inc_pr_index;
+
+	if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
+		return 0;
+
+	my_mutex_lock(1, &toi_bio_mutex);
+	buffer_virt = kmap(buffer_page);
+
+	if (toi_rw_buffer(WRITE, (char *) &pfn, sizeof(unsigned long), 0) ||
+	    toi_rw_buffer(WRITE, (char *) &buf_size, sizeof(int), 0) ||
+	    toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
+		printk(KERN_INFO "toi_rw_buffer returned non-zero to "
+				"toi_bio_write_page.\n");
+		result = -EIO;
+	}
+
+	PR_DEBUG("%d: Index %ld, %d bytes. Result %d.\n", pr_index, pfn,
+			buf_size, result);
+
+	kunmap(buffer_page);
+	my_mutex_unlock(1, &toi_bio_mutex);
+
+	if (current == toi_queue_flusher)
+		result2 = toi_bio_queue_flush_pages(0);
+
+	return result ? result : result2;
+}
+
+/**
+ * toi_rw_header_chunk: Read or write a portion of the image header.
+ *
+ * @writing: Whether reading or writing.
+ * @owner: The module for which we're writing. Used for confirming that modules
+ * don't use more header space than they asked for.
+ * @buffer: Address of the data to write.
+ * @buffer_size: Size of the data buffer.
+ * @no_readahead: Don't try to start readhead (when still getting extents)
+ */
+static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+		char *buffer, int buffer_size, int no_readahead)
+{
+	int result = 0;
+
+	if (owner) {
+		owner->header_used += buffer_size;
+		toi_message(TOI_HEADER, TOI_LOW, 1,
+			"Header: %s : %d bytes (%d/%d).\n",
+			buffer_size, owner->header_used,
+			owner->header_requested);
+		if (owner->header_used > owner->header_requested) {
+			printk(KERN_EMERG "TuxOnIce module %s is using more "
+				"header space (%u) than it requested (%u).\n",
+				owner->name,
+				owner->header_used,
+				owner->header_requested);
+			return buffer_size;
+		}
+	} else
+		toi_message(TOI_HEADER, TOI_LOW, 1,
+			"Header: (No owner): %d bytes.\n", buffer_size);
+
+	if (!writing && !no_readahead)
+		result = toi_start_new_readahead(0);
+
+	if (!result)
+		result = toi_rw_buffer(writing, buffer, buffer_size,
+				no_readahead);
+
+	return result;
+}
+
+static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
+		char *buffer, int size)
+{
+	return _toi_rw_header_chunk(writing, owner, buffer, size, 0);
+}
+
+static int toi_rw_header_chunk_noreadahead(int writing,
+		struct toi_module_ops *owner, char *buffer, int size)
+{
+	return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
+}
+
+/**
+ * write_header_chunk_finish: Flush any buffered header data.
+ */
+static int write_header_chunk_finish(void)
+{
+	int result = 0;
+
+	if (toi_writer_buffer_posn)
+		toi_bio_queue_write(&toi_writer_buffer);
+
+	toi_bio_queue_flush_pages(0);
+	toi_finish_all_io();
+
+	return result;
+}
+
+/**
+ * toi_bio_storage_needed: Get the amount of storage needed for my fns.
+ */
+static int toi_bio_storage_needed(void)
+{
+	return 2 * sizeof(int);
+}
+
+/**
+ * toi_bio_save_config_info: Save block i/o config to image header.
+ *
+ * @buf: PAGE_SIZE'd buffer into which data should be saved.
+ */
+static int toi_bio_save_config_info(char *buf)
+{
+	int *ints = (int *) buf;
+	ints[0] = target_outstanding_io;
+	return sizeof(int);
+}
+
+/**
+ * toi_bio_load_config_info: Restore block i/o config.
+ *
+ * @buf: Data to be reloaded.
+ * @size: Size of the buffer saved.
+ */
+static void toi_bio_load_config_info(char *buf, int size)
+{
+	int *ints = (int *) buf;
+	target_outstanding_io  = ints[0];
+}
+
+/**
+ * toi_bio_initialise: Initialise bio code at start of some action.
+ *
+ * @starting_cycle: Whether starting a hibernation cycle, or just reading or
+ * writing a sysfs value.
+ */
+static int toi_bio_initialise(int starting_cycle)
+{
+	if (starting_cycle) {
+		max_outstanding_writes = 0;
+		max_outstanding_reads = 0;
+		toi_queue_flusher = current;
+#ifdef MEASURE_MUTEX_CONTENTION
+		{
+		int i, j, k;
+
+		for (i = 0; i < 2; i++)
+			for (j = 0; j < 2; j++)
+				for_each_online_cpu(k)
+					mutex_times[i][j][k] = 0;
+		}
+#endif
+	}
+
+	return 0;
+}
+
+/**
+ * toi_bio_cleanup: Cleanup after some action.
+ *
+ * @finishing_cycle: Whether completing a cycle.
+ */
+static void toi_bio_cleanup(int finishing_cycle)
+{
+	if (toi_writer_buffer) {
+		toi_free_page(11, (unsigned long) toi_writer_buffer);
+		toi_writer_buffer = NULL;
+	}
+}
+
+struct toi_bio_ops toi_bio_ops = {
+	.bdev_page_io = toi_bdev_page_io,
+	.finish_all_io = toi_finish_all_io,
+	.forward_one_page = go_next_page,
+	.set_extra_page_forward = set_extra_page_forward,
+	.set_devinfo = toi_set_devinfo,
+	.read_page = toi_bio_read_page,
+	.write_page = toi_bio_write_page,
+	.rw_init = toi_rw_init,
+	.rw_cleanup = toi_rw_cleanup,
+	.read_header_init = toi_read_header_init,
+	.rw_header_chunk = toi_rw_header_chunk,
+	.rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead,
+	.write_header_chunk_finish = write_header_chunk_finish,
+	.io_flusher = bio_io_flusher,
+};
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("target_outstanding_io", SYSFS_RW),
+	  SYSFS_INT(&target_outstanding_io, 0, TARGET_OUTSTANDING_IO, 0),
+	}
+};
+
+static struct toi_module_ops toi_blockwriter_ops = {
+	.name					= "lowlevel i/o",
+	.type					= MISC_HIDDEN_MODULE,
+	.directory				= "block_io",
+	.module					= THIS_MODULE,
+	.print_debug_info			= toi_bio_print_debug_stats,
+	.memory_needed				= toi_bio_memory_needed,
+	.storage_needed				= toi_bio_storage_needed,
+	.save_config_info			= toi_bio_save_config_info,
+	.load_config_info			= toi_bio_load_config_info,
+	.initialise				= toi_bio_initialise,
+	.cleanup				= toi_bio_cleanup,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/**
+ * toi_block_io_load: Load time routine for block i/o module.
+ *
+ * Register block i/o ops and sysfs entries.
+ */
+static __init int toi_block_io_load(void)
+{
+	return toi_register_module(&toi_blockwriter_ops);
+}
+
+#if defined(CONFIG_TOI_FILE_EXPORTS) || defined(CONFIG_TOI_SWAP_EXPORTS)
+EXPORT_SYMBOL_GPL(toi_writer_posn);
+EXPORT_SYMBOL_GPL(toi_writer_posn_save);
+EXPORT_SYMBOL_GPL(toi_writer_buffer);
+EXPORT_SYMBOL_GPL(toi_writer_buffer_posn);
+EXPORT_SYMBOL_GPL(toi_bio_ops);
+#endif
+#ifdef MODULE
+static __exit void toi_block_io_unload(void)
+{
+	toi_unregister_module(&toi_blockwriter_ops);
+}
+
+module_init(toi_block_io_load);
+module_exit(toi_block_io_unload);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("TuxOnIce block io functions");
+#else
+late_initcall(toi_block_io_load);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_block_io.h linux-2.6-block-custom/kernel/power/tuxonice_block_io.h
--- linux-2.6-block/kernel/power/tuxonice_block_io.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_block_io.h	2008-09-26 19:48:23.979754551 +0900
@@ -0,0 +1,57 @@
+/*
+ * kernel/power/tuxonice_block_io.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ * Copyright (C) 2006 Red Hat, inc.
+ *
+ * Distributed under GPLv2.
+ *
+ * This file contains declarations for functions exported from
+ * tuxonice_block_io.c, which contains low level io functions.
+ */
+
+#include <linux/buffer_head.h>
+#include "tuxonice_extent.h"
+
+struct toi_bdev_info {
+	struct block_device *bdev;
+	dev_t dev_t;
+	int bmap_shift;
+	int blocks_per_page;
+};
+
+/*
+ * Our exported interface so the swapwriter and filewriter don't
+ * need these functions duplicated.
+ */
+struct toi_bio_ops {
+	int (*bdev_page_io) (int rw, struct block_device *bdev, long pos,
+			struct page *page);
+	void (*check_io_stats) (void);
+	void (*reset_io_stats) (void);
+	void (*finish_all_io) (void);
+	int (*forward_one_page) (int writing);
+	void (*set_extra_page_forward) (void);
+	void (*set_devinfo) (struct toi_bdev_info *info);
+	int (*read_page) (unsigned long *index, struct page *buffer_page,
+			unsigned int *buf_size);
+	int (*write_page) (unsigned long index, struct page *buffer_page,
+			unsigned int buf_size);
+	void (*read_header_init) (void);
+	int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
+			char *buffer, int buffer_size);
+	int (*rw_header_chunk_noreadahead) (int rw,
+			struct toi_module_ops *owner,
+			char *buffer, int buffer_size);
+	int (*write_header_chunk_finish) (void);
+	int (*rw_init) (int rw, int stream_number);
+	int (*rw_cleanup) (int rw);
+	void (*io_flusher) (int rw);
+};
+
+extern struct toi_bio_ops toi_bio_ops;
+
+extern char *toi_writer_buffer;
+extern int toi_writer_buffer_posn;
+extern struct hibernate_extent_iterate_saved_state toi_writer_posn_save[4];
+extern struct hibernate_extent_iterate_state toi_writer_posn;
diff -Npur linux-2.6-block/kernel/power/tuxonice_builtin.c linux-2.6-block-custom/kernel/power/tuxonice_builtin.c
--- linux-2.6-block/kernel/power/tuxonice_builtin.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_builtin.c	2008-09-26 19:48:23.979754551 +0900
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/module.h>
+#include <linux/resume-trace.h>
+#include <linux/kernel.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/bio.h>
+#include <linux/root_dev.h>
+#include <linux/freezer.h>
+#include <linux/reboot.h>
+#include <linux/writeback.h>
+#include <linux/tty.h>
+#include <linux/crypto.h>
+#include <linux/cpu.h>
+#include <linux/dyn_pageflags.h>
+#include <linux/ctype.h>
+#include "tuxonice_io.h"
+#include "tuxonice.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_block_io.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_power_off.h"
+#include "power.h"
+
+/*
+ * Highmem related functions (x86 only).
+ */
+
+#ifdef CONFIG_HIGHMEM
+
+/**
+ * copyback_high: Restore highmem pages.
+ *
+ * Highmem data and pbe lists are/can be stored in highmem.
+ * The format is slightly different to the lowmem pbe lists
+ * used for the assembly code: the last pbe in each page is
+ * a struct page * instead of struct pbe *, pointing to the
+ * next page where pbes are stored (or NULL if happens to be
+ * the end of the list). Since we don't want to generate
+ * unnecessary deltas against swsusp code, we use a cast
+ * instead of a union.
+ **/
+
+static void copyback_high(void)
+{
+	struct page *pbe_page = (struct page *) restore_highmem_pblist;
+	struct pbe *this_pbe, *first_pbe;
+	unsigned long *origpage, *copypage;
+	int pbe_index = 1;
+
+	if (!pbe_page)
+		return;
+
+	this_pbe = (struct pbe *) kmap_atomic(pbe_page, KM_BOUNCE_READ);
+	first_pbe = this_pbe;
+
+	while (this_pbe) {
+		int loop = (PAGE_SIZE / sizeof(unsigned long)) - 1;
+
+		origpage = kmap_atomic((struct page *) this_pbe->orig_address,
+			KM_BIO_DST_IRQ);
+		copypage = kmap_atomic((struct page *) this_pbe->address,
+			KM_BIO_SRC_IRQ);
+
+		while (loop >= 0) {
+			*(origpage + loop) = *(copypage + loop);
+			loop--;
+		}
+
+		kunmap_atomic(origpage, KM_BIO_DST_IRQ);
+		kunmap_atomic(copypage, KM_BIO_SRC_IRQ);
+
+		if (!this_pbe->next)
+			break;
+
+		if (pbe_index < PBES_PER_PAGE) {
+			this_pbe++;
+			pbe_index++;
+		} else {
+			pbe_page = (struct page *) this_pbe->next;
+			kunmap_atomic(first_pbe, KM_BOUNCE_READ);
+			if (!pbe_page)
+				return;
+			this_pbe = (struct pbe *) kmap_atomic(pbe_page,
+					KM_BOUNCE_READ);
+			first_pbe = this_pbe;
+			pbe_index = 1;
+		}
+	}
+	kunmap_atomic(first_pbe, KM_BOUNCE_READ);
+}
+
+#else /* CONFIG_HIGHMEM */
+void copyback_high(void) { }
+#endif
+
+char toi_wait_for_keypress_dev_console(int timeout)
+{
+	int fd, this_timeout = 255;
+	char key = '\0';
+	struct termios t, t_backup;
+
+	/* We should be guaranteed /dev/console exists after populate_rootfs()
+	 * in init/main.c.
+	 */
+	fd = sys_open("/dev/console", O_RDONLY, 0);
+	if (fd < 0) {
+		printk(KERN_INFO "Couldn't open /dev/console.\n");
+		return key;
+	}
+
+	if (sys_ioctl(fd, TCGETS, (long)&t) < 0)
+		goto out_close;
+
+	memcpy(&t_backup, &t, sizeof(t));
+
+	t.c_lflag &= ~(ISIG|ICANON|ECHO);
+	t.c_cc[VMIN] = 0;
+
+new_timeout:
+	if (timeout > 0) {
+		this_timeout = timeout < 26 ? timeout : 25;
+		timeout -= this_timeout;
+		this_timeout *= 10;
+	}
+
+	t.c_cc[VTIME] = this_timeout;
+
+	if (sys_ioctl(fd, TCSETS, (long)&t) < 0)
+		goto out_restore;
+
+	while (1) {
+		if (sys_read(fd, &key, 1) <= 0) {
+			if (timeout)
+				goto new_timeout;
+			key = '\0';
+			break;
+		}
+		key = tolower(key);
+		if (test_toi_state(TOI_SANITY_CHECK_PROMPT)) {
+			if (key == 'c') {
+				set_toi_state(TOI_CONTINUE_REQ);
+				break;
+			} else if (key == ' ')
+				break;
+		} else
+			break;
+	}
+
+out_restore:
+	sys_ioctl(fd, TCSETS, (long)&t_backup);
+out_close:
+	sys_close(fd);
+
+	return key;
+}
+
+struct toi_boot_kernel_data toi_bkd __nosavedata
+		__attribute__((aligned(PAGE_SIZE))) = {
+	MY_BOOT_KERNEL_DATA_VERSION,
+	0,
+#ifdef CONFIG_TOI_REPLACE_SWSUSP
+	(1 << TOI_REPLACE_SWSUSP) |
+#endif
+	(1 << TOI_NO_FLUSHER_THREAD) |
+	(1 << TOI_PAGESET2_FULL) | (1 << TOI_LATE_CPU_HOTPLUG),
+};
+EXPORT_SYMBOL_GPL(toi_bkd);
+
+struct block_device *toi_open_by_devnum(dev_t dev, unsigned mode)
+{
+	struct block_device *bdev = bdget(dev);
+	int err = -ENOMEM;
+	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
+	flags |= O_NONBLOCK;
+	if (bdev)
+		err = blkdev_get(bdev, mode, flags);
+	return err ? ERR_PTR(err) : bdev;
+}
+EXPORT_SYMBOL_GPL(toi_open_by_devnum);
+
+EXPORT_SYMBOL_GPL(toi_wait_for_keypress_dev_console);
+EXPORT_SYMBOL_GPL(hibernation_platform_enter);
+EXPORT_SYMBOL_GPL(platform_begin);
+EXPORT_SYMBOL_GPL(platform_pre_snapshot);
+EXPORT_SYMBOL_GPL(platform_leave);
+EXPORT_SYMBOL_GPL(platform_end);
+EXPORT_SYMBOL_GPL(platform_finish);
+EXPORT_SYMBOL_GPL(platform_pre_restore);
+EXPORT_SYMBOL_GPL(platform_restore_cleanup);
+EXPORT_SYMBOL_GPL(power_kobj);
+EXPORT_SYMBOL_GPL(pm_notifier_call_chain);
+EXPORT_SYMBOL_GPL(init_swsusp_header);
+
+#ifdef CONFIG_ARCH_HIBERNATION_HEADER
+EXPORT_SYMBOL_GPL(arch_hibernation_header_save);
+EXPORT_SYMBOL_GPL(arch_hibernation_header_restore);
+#endif
+
+#ifdef CONFIG_TOI_CORE_EXPORTS
+#ifdef CONFIG_X86_64
+EXPORT_SYMBOL_GPL(restore_processor_state);
+EXPORT_SYMBOL_GPL(save_processor_state);
+#endif
+
+EXPORT_SYMBOL_GPL(drop_pagecache);
+EXPORT_SYMBOL_GPL(restore_pblist);
+EXPORT_SYMBOL_GPL(pm_mutex);
+EXPORT_SYMBOL_GPL(pm_restore_console);
+EXPORT_SYMBOL_GPL(super_blocks);
+EXPORT_SYMBOL_GPL(next_zone);
+
+EXPORT_SYMBOL_GPL(freeze_processes);
+EXPORT_SYMBOL_GPL(thaw_processes);
+EXPORT_SYMBOL_GPL(thaw_kernel_threads);
+EXPORT_SYMBOL_GPL(shrink_all_memory);
+EXPORT_SYMBOL_GPL(shrink_one_zone);
+EXPORT_SYMBOL_GPL(saveable_page);
+EXPORT_SYMBOL_GPL(swsusp_arch_suspend);
+EXPORT_SYMBOL_GPL(swsusp_arch_resume);
+EXPORT_SYMBOL_GPL(pm_prepare_console);
+EXPORT_SYMBOL_GPL(follow_page);
+EXPORT_SYMBOL_GPL(machine_halt);
+EXPORT_SYMBOL_GPL(block_dump);
+EXPORT_SYMBOL_GPL(unlink_lru_lists);
+EXPORT_SYMBOL_GPL(relink_lru_lists);
+EXPORT_SYMBOL_GPL(machine_power_off);
+EXPORT_SYMBOL_GPL(suspend_devices_and_enter);
+EXPORT_SYMBOL_GPL(first_online_pgdat);
+EXPORT_SYMBOL_GPL(next_online_pgdat);
+EXPORT_SYMBOL_GPL(machine_restart);
+EXPORT_SYMBOL_GPL(saved_command_line);
+EXPORT_SYMBOL_GPL(tasklist_lock);
+#ifdef CONFIG_PM_SLEEP_SMP
+EXPORT_SYMBOL_GPL(disable_nonboot_cpus);
+EXPORT_SYMBOL_GPL(enable_nonboot_cpus);
+#endif
+#endif
+
+int toi_wait = CONFIG_TOI_DEFAULT_WAIT;
+
+#ifdef CONFIG_TOI_USERUI_EXPORTS
+EXPORT_SYMBOL_GPL(kmsg_redirect);
+#endif
+EXPORT_SYMBOL_GPL(toi_wait);
+
+#if defined(CONFIG_TOI_USERUI_EXPORTS) || defined(CONFIG_TOI_CORE_EXPORTS)
+EXPORT_SYMBOL_GPL(console_printk);
+#endif
+#ifdef CONFIG_TOI_SWAP_EXPORTS	/* TuxOnIce swap specific */
+EXPORT_SYMBOL_GPL(sys_swapon);
+EXPORT_SYMBOL_GPL(sys_swapoff);
+EXPORT_SYMBOL_GPL(si_swapinfo);
+EXPORT_SYMBOL_GPL(map_swap_page);
+EXPORT_SYMBOL_GPL(get_swap_page);
+EXPORT_SYMBOL_GPL(swap_free);
+EXPORT_SYMBOL_GPL(get_swap_info_struct);
+#endif
+
+#ifdef CONFIG_TOI_FILE_EXPORTS
+/* TuxOnice file allocator specific support */
+EXPORT_SYMBOL_GPL(sys_unlink);
+EXPORT_SYMBOL_GPL(sys_mknod);
+#endif
+
+/* Swap or file */
+#if defined(CONFIG_TOI_FILE_EXPORTS) || defined(CONFIG_TOI_SWAP_EXPORTS)
+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
+EXPORT_SYMBOL_GPL(name_to_dev_t);
+#endif
+
+#if defined(CONFIG_TOI_FILE_EXPORTS) || defined(CONFIG_TOI_SWAP_EXPORTS) || \
+	defined(CONFIG_TOI_CORE_EXPORTS)
+EXPORT_SYMBOL_GPL(resume_file);
+#endif
+struct toi_core_fns *toi_core_fns;
+EXPORT_SYMBOL_GPL(toi_core_fns);
+
+DECLARE_DYN_PAGEFLAGS(pageset1_map);
+DECLARE_DYN_PAGEFLAGS(pageset1_copy_map);
+EXPORT_SYMBOL_GPL(pageset1_map);
+EXPORT_SYMBOL_GPL(pageset1_copy_map);
+
+unsigned long toi_result;
+struct pagedir pagedir1 = {1};
+
+EXPORT_SYMBOL_GPL(toi_result);
+EXPORT_SYMBOL_GPL(pagedir1);
+
+unsigned long toi_get_nonconflicting_page(void)
+{
+	return toi_core_fns->get_nonconflicting_page();
+}
+
+int toi_post_context_save(void)
+{
+	return toi_core_fns->post_context_save();
+}
+
+int toi_try_hibernate(int have_pmsem)
+{
+	if (!toi_core_fns)
+		return -ENODEV;
+
+	return toi_core_fns->try_hibernate(have_pmsem);
+}
+
+void toi_try_resume(void)
+{
+	if (toi_core_fns)
+		toi_core_fns->try_resume();
+	else
+		printk(KERN_INFO "TuxOnIce core not loaded yet.\n");
+}
+
+int toi_lowlevel_builtin(void)
+{
+	int error = 0;
+
+	save_processor_state();
+	error = swsusp_arch_suspend();
+	if (error)
+		printk(KERN_ERR "Error %d hibernating\n", error);
+
+	/* Restore control flow appears here */
+	if (!toi_in_hibernate) {
+		copyback_high();
+		set_toi_state(TOI_NOW_RESUMING);
+	}
+
+	restore_processor_state();
+
+	return error;
+}
+
+EXPORT_SYMBOL_GPL(toi_lowlevel_builtin);
+
+unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
+EXPORT_SYMBOL_GPL(toi_compress_bytes_in);
+EXPORT_SYMBOL_GPL(toi_compress_bytes_out);
+
+unsigned long toi_state = ((1 << TOI_BOOT_TIME) |
+		(1 << TOI_IGNORE_LOGLEVEL) |
+		(1 << TOI_IO_STOPPED));
+EXPORT_SYMBOL_GPL(toi_state);
+
+/* The number of hibernates we have started (some may have been cancelled) */
+unsigned int nr_hibernates;
+EXPORT_SYMBOL_GPL(nr_hibernates);
+
+int toi_running;
+EXPORT_SYMBOL_GPL(toi_running);
+
+int toi_in_hibernate __nosavedata;
+EXPORT_SYMBOL_GPL(toi_in_hibernate);
+
+__nosavedata struct pbe *restore_highmem_pblist;
+
+#ifdef CONFIG_TOI_CORE_EXPORTS
+#ifdef CONFIG_HIGHMEM
+EXPORT_SYMBOL_GPL(nr_free_highpages);
+EXPORT_SYMBOL_GPL(saveable_highmem_page);
+EXPORT_SYMBOL_GPL(restore_highmem_pblist);
+#endif
+#endif
+
+#if defined(CONFIG_TOI_CORE_EXPORTS) || defined(CONFIG_TOI_PAGEFLAGS_EXPORTS)
+EXPORT_SYMBOL_GPL(max_pfn);
+#endif
+
+#if defined(CONFIG_TOI_EXPORTS) || defined(CONFIG_TOI_CORE_EXPORTS)
+EXPORT_SYMBOL_GPL(snprintf_used);
+#endif
+
+static int __init toi_wait_setup(char *str)
+{
+	int value;
+
+	if (sscanf(str, "=%d", &value)) {
+		if (value < -1 || value > 255)
+			printk(KERN_INFO "TuxOnIce_wait outside range -1 to "
+					"255.\n");
+		else
+			toi_wait = value;
+	}
+
+	return 1;
+}
+
+__setup("toi_wait", toi_wait_setup);
diff -Npur linux-2.6-block/kernel/power/tuxonice_builtin.h linux-2.6-block-custom/kernel/power/tuxonice_builtin.h
--- linux-2.6-block/kernel/power/tuxonice_builtin.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_builtin.h	2008-09-26 19:48:23.979754551 +0900
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/dyn_pageflags.h>
+#include <asm/setup.h>
+
+extern struct toi_core_fns *toi_core_fns;
+extern unsigned long toi_compress_bytes_in, toi_compress_bytes_out;
+extern unsigned int nr_hibernates;
+extern int toi_in_hibernate;
+
+extern __nosavedata struct pbe *restore_highmem_pblist;
+
+int toi_lowlevel_builtin(void);
+
+extern struct dyn_pageflags __nosavedata toi_nosave_origmap;
+extern struct dyn_pageflags __nosavedata toi_nosave_copymap;
+
+#ifdef CONFIG_HIGHMEM
+extern __nosavedata struct zone_data *toi_nosave_zone_list;
+extern __nosavedata unsigned long toi_nosave_max_pfn;
+#endif
+
+extern unsigned long toi_get_nonconflicting_page(void);
+extern int toi_post_context_save(void);
+extern int toi_try_hibernate(int have_pmsem);
+extern char toi_wait_for_keypress_dev_console(int timeout);
+extern struct block_device *toi_open_by_devnum(dev_t dev, unsigned mode);
+extern int toi_wait;
diff -Npur linux-2.6-block/kernel/power/tuxonice_checksum.c linux-2.6-block-custom/kernel/power/tuxonice_checksum.c
--- linux-2.6-block/kernel/power/tuxonice_checksum.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_checksum.c	2008-09-26 19:48:23.979754551 +0900
@@ -0,0 +1,389 @@
+/*
+ * kernel/power/tuxonice_checksum.c
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ * Copyright (C) 2006 Red Hat, inc.
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_alloc.h"
+
+static struct toi_module_ops toi_checksum_ops;
+
+/* Constant at the mo, but I might allow tuning later */
+static char toi_checksum_name[32] = "md4";
+/* Bytes per checksum */
+#define CHECKSUM_SIZE (16)
+
+#define CHECKSUMS_PER_PAGE ((PAGE_SIZE - sizeof(void *)) / CHECKSUM_SIZE)
+
+struct cpu_context {
+	struct crypto_hash *transform;
+	struct hash_desc desc;
+	struct scatterlist sg[2];
+	char *buf;
+};
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+static int pages_allocated;
+static unsigned long page_list;
+
+static int toi_num_resaved;
+
+static unsigned long this_checksum, next_page;
+static int checksum_index;
+
+static inline int checksum_pages_needed(void)
+{
+	return DIV_ROUND_UP(pagedir2.size, CHECKSUMS_PER_PAGE);
+}
+
+/* ---- Local buffer management ---- */
+
+/*
+ * toi_checksum_cleanup
+ *
+ * Frees memory allocated for our labours.
+ */
+static void toi_checksum_cleanup(int ending_cycle)
+{
+	int cpu;
+
+	if (ending_cycle) {
+		for_each_online_cpu(cpu) {
+			struct cpu_context *this = &per_cpu(contexts, cpu);
+			if (this->transform) {
+				crypto_free_hash(this->transform);
+				this->transform = NULL;
+				this->desc.tfm = NULL;
+			}
+
+			if (this->buf) {
+				toi_free_page(27, (unsigned long) this->buf);
+				this->buf = NULL;
+			}
+		}
+	}
+}
+
+/*
+ * toi_crypto_initialise
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ * Returns: Int: Zero. Even if we can't set up checksum, we still
+ * seek to hibernate.
+ */
+static int toi_checksum_initialise(int starting_cycle)
+{
+	int cpu;
+
+	if (!(starting_cycle & SYSFS_HIBERNATE) || !toi_checksum_ops.enabled)
+		return 0;
+
+	if (!*toi_checksum_name) {
+		printk(KERN_INFO "TuxOnIce: No checksum algorithm name set.\n");
+		return 1;
+	}
+
+	for_each_online_cpu(cpu) {
+		struct cpu_context *this = &per_cpu(contexts, cpu);
+		struct page *page;
+
+		this->transform = crypto_alloc_hash(toi_checksum_name, 0, 0);
+		if (IS_ERR(this->transform)) {
+			printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+				"%s checksum algorithm: %ld.\n",
+				toi_checksum_name, (long) this->transform);
+			this->transform = NULL;
+			return 1;
+		}
+
+		this->desc.tfm = this->transform;
+		this->desc.flags = 0;
+
+		page = toi_alloc_page(27, GFP_KERNEL);
+		if (!page)
+			return 1;
+		this->buf = page_address(page);
+		sg_init_one(&this->sg[0], this->buf, PAGE_SIZE);
+	}
+	return 0;
+}
+
+/*
+ * toi_checksum_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_checksum_print_debug_stats(char *buffer, int size)
+{
+	int len;
+
+	if (!toi_checksum_ops.enabled)
+		return snprintf_used(buffer, size,
+			"- Checksumming disabled.\n");
+
+	len = snprintf_used(buffer, size, "- Checksum method is '%s'.\n",
+			toi_checksum_name);
+	len += snprintf_used(buffer + len, size - len,
+		"  %d pages resaved in atomic copy.\n", toi_num_resaved);
+	return len;
+}
+
+static int toi_checksum_memory_needed(void)
+{
+	return toi_checksum_ops.enabled ?
+		checksum_pages_needed() << PAGE_SHIFT : 0;
+}
+
+static int toi_checksum_storage_needed(void)
+{
+	if (toi_checksum_ops.enabled)
+		return strlen(toi_checksum_name) + sizeof(int) + 1;
+	else
+		return 0;
+}
+
+/*
+ * toi_checksum_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_checksum_save_config_info(char *buffer)
+{
+	int namelen = strlen(toi_checksum_name) + 1;
+	int total_len;
+
+	*((unsigned int *) buffer) = namelen;
+	strncpy(buffer + sizeof(unsigned int), toi_checksum_name, namelen);
+	total_len = sizeof(unsigned int) + namelen;
+	return total_len;
+}
+
+/* toi_checksum_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:	Reload information needed for dechecksuming the image at
+ * resume time.
+ */
+static void toi_checksum_load_config_info(char *buffer, int size)
+{
+	int namelen;
+
+	namelen = *((unsigned int *) (buffer));
+	strncpy(toi_checksum_name, buffer + sizeof(unsigned int),
+			namelen);
+	return;
+}
+
+/*
+ * Free Checksum Memory
+ */
+
+void free_checksum_pages(void)
+{
+	while (pages_allocated) {
+		unsigned long next = *((unsigned long *) page_list);
+		ClearPageNosave(virt_to_page(page_list));
+		toi_free_page(15, (unsigned long) page_list);
+		page_list = next;
+		pages_allocated--;
+	}
+}
+
+/*
+ * Allocate Checksum Memory
+ */
+
+int allocate_checksum_pages(void)
+{
+	int pages_needed = checksum_pages_needed();
+
+	if (!toi_checksum_ops.enabled)
+		return 0;
+
+	while (pages_allocated < pages_needed) {
+		unsigned long *new_page =
+		  (unsigned long *) toi_get_zeroed_page(15, TOI_ATOMIC_GFP);
+		if (!new_page) {
+			printk("Unable to allocate checksum pages.\n");
+			return -ENOMEM;
+		}
+		SetPageNosave(virt_to_page(new_page));
+		(*new_page) = page_list;
+		page_list = (unsigned long) new_page;
+		pages_allocated++;
+	}
+
+	next_page = (unsigned long) page_list;
+	checksum_index = 0;
+
+	return 0;
+}
+
+#if 0
+static void print_checksum(char *buf, int size)
+{
+	int index;
+
+	for (index = 0; index < size; index++)
+		printk(KERN_INFO "%x ", buf[index]);
+
+	printk("\n");
+}
+#endif
+
+char *tuxonice_get_next_checksum(void)
+{
+	if (!toi_checksum_ops.enabled)
+		return NULL;
+
+	if (checksum_index % CHECKSUMS_PER_PAGE)
+		this_checksum += CHECKSUM_SIZE;
+	else {
+		this_checksum = next_page + sizeof(void *);
+		next_page = *((unsigned long *) next_page);
+	}
+
+	checksum_index++;
+	return (char *) this_checksum;
+}
+
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+{
+	char *pa;
+	int result, cpu = smp_processor_id();
+	struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+	if (!toi_checksum_ops.enabled)
+		return 0;
+
+	pa = kmap(page);
+	memcpy(ctx->buf, pa, PAGE_SIZE);
+	kunmap(page);
+	result = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+						checksum_locn);
+	return result;
+}
+/*
+ * Calculate checksums
+ */
+
+void check_checksums(void)
+{
+	int pfn, index = 0, cpu = smp_processor_id();
+	unsigned long next_page, this_checksum = 0;
+	char current_checksum[CHECKSUM_SIZE];
+	struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+	if (!toi_checksum_ops.enabled)
+		return;
+
+	next_page = (unsigned long) page_list;
+
+	toi_num_resaved = 0;
+
+	BITMAP_FOR_EACH_SET(&pageset2_map, pfn) {
+		int ret;
+		char *pa;
+		struct page *page = pfn_to_page(pfn);
+
+		if (index % CHECKSUMS_PER_PAGE) {
+			this_checksum += CHECKSUM_SIZE;
+		} else {
+			this_checksum = next_page + sizeof(void *);
+			next_page = *((unsigned long *) next_page);
+		}
+
+		/* Done when IRQs disabled so must be atomic */
+		pa = kmap_atomic(page, KM_USER1);
+		memcpy(ctx->buf, pa, PAGE_SIZE);
+		kunmap_atomic(pa, KM_USER1);
+		ret = crypto_hash_digest(&ctx->desc, ctx->sg, PAGE_SIZE,
+							current_checksum);
+
+		if (ret) {
+			printk(KERN_INFO "Digest failed. Returned %d.\n", ret);
+			return;
+		}
+
+		if (memcmp(current_checksum, (char *) this_checksum,
+							CHECKSUM_SIZE)) {
+			SetPageResave(pfn_to_page(pfn));
+			toi_num_resaved++;
+			if (test_action_state(TOI_ABORT_ON_RESAVE_NEEDED))
+				set_abort_result(TOI_RESAVE_NEEDED);
+		}
+
+		index++;
+	}
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&toi_checksum_ops.enabled, 0, 1, 0)
+	},
+
+	{ TOI_ATTR("abort_if_resave_needed", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_ABORT_ON_RESAVE_NEEDED, 0)
+	}
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_checksum_ops = {
+	.type			= MISC_MODULE,
+	.name			= "checksumming",
+	.directory		= "checksum",
+	.module			= THIS_MODULE,
+	.initialise		= toi_checksum_initialise,
+	.cleanup		= toi_checksum_cleanup,
+	.print_debug_info	= toi_checksum_print_debug_stats,
+	.save_config_info	= toi_checksum_save_config_info,
+	.load_config_info	= toi_checksum_load_config_info,
+	.memory_needed		= toi_checksum_memory_needed,
+	.storage_needed		= toi_checksum_storage_needed,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+int toi_checksum_init(void)
+{
+	int result = toi_register_module(&toi_checksum_ops);
+	return result;
+}
+
+void toi_checksum_exit(void)
+{
+	toi_unregister_module(&toi_checksum_ops);
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_checksum.h linux-2.6-block-custom/kernel/power/tuxonice_checksum.h
--- linux-2.6-block/kernel/power/tuxonice_checksum.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_checksum.h	2008-09-26 19:48:23.983764697 +0900
@@ -0,0 +1,32 @@
+/*
+ * kernel/power/tuxonice_checksum.h
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ * Copyright (C) 2006 Red Hat, inc.
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data checksum routines for TuxOnIce,
+ * using cryptoapi. They are used to locate any modifications
+ * made to pageset 2 while we're saving it.
+ */
+
+#if defined(CONFIG_TOI_CHECKSUM)
+extern int toi_checksum_init(void);
+extern void toi_checksum_exit(void);
+void check_checksums(void);
+int allocate_checksum_pages(void);
+void free_checksum_pages(void);
+char *tuxonice_get_next_checksum(void);
+int tuxonice_calc_checksum(struct page *page, char *checksum_locn);
+#else
+static inline int toi_checksum_init(void) { return 0; }
+static inline void toi_checksum_exit(void) { }
+static inline void check_checksums(void) { };
+static inline int allocate_checksum_pages(void) { return 0; };
+static inline void free_checksum_pages(void) { };
+static inline char *tuxonice_get_next_checksum(void) { return NULL; };
+static inline int tuxonice_calc_checksum(struct page *page, char *checksum_locn)
+	{ return 0; }
+#endif
+
diff -Npur linux-2.6-block/kernel/power/tuxonice_cluster.c linux-2.6-block-custom/kernel/power/tuxonice_cluster.c
--- linux-2.6-block/kernel/power/tuxonice_cluster.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_cluster.c	2008-09-26 19:48:23.983764697 +0900
@@ -0,0 +1,1088 @@
+/*
+ * kernel/power/tuxonice_cluster.c
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains routines for cluster hibernation support.
+ *
+ * Based on ip autoconfiguration code in net/ipv4/ipconfig.c.
+ *
+ * How does it work?
+ *
+ * There is no 'master' node that tells everyone else what to do. All nodes
+ * send messages to the broadcast address/port, maintain a list of peers
+ * and figure out when to progress to the next step in hibernating or resuming.
+ * This makes us more fault tolerant when it comes to nodes coming and going
+ * (which may be more of an issue if we're hibernating when power supplies
+ * are being unreliable).
+ *
+ * At boot time, we start a ktuxonice thread that handles communication with
+ * other nodes. This node maintains a state machine that controls our progress
+ * through hibernating and resuming, keeping us in step with other nodes. Nodes
+ * are identified by their hw address.
+ *
+ * On startup, the node sends CLUSTER_PING on the configured interface's
+ * broadcast address, port $toi_cluster_port (see below) and begins to listen
+ * for other broadcast messages. CLUSTER_PING messages are repeated at
+ * intervals of 5 minutes, with a random offset to spread traffic out.
+ *
+ * A hibernation cycle is initiated from any node via
+ *
+ * echo > /sys/power/tuxonice/do_hibernate
+ *
+ * and (possibily) the hibernate script. At each step of the process, the node
+ * completes its work, and waits for all other nodes to signal completion of
+ * their work (or timeout) before progressing to the next step.
+ *
+ * Request/state  Action before reply	Possible reply	Next state
+ * HIBERNATE	  capable, pre-script	HIBERNATE|ACK	NODE_PREP
+ * 					HIBERNATE|NACK	INIT_0
+ *
+ * PREP		  prepare_image		PREP|ACK	IMAGE_WRITE
+ *		 			PREP|NACK	INIT_0
+ * 					ABORT		RUNNING
+ *
+ * IO		  write image		IO|ACK		power off
+ * 					ABORT		POST_RESUME
+ *
+ * (Boot time)	  check for image	IMAGE|ACK	RESUME_PREP
+ * 					(Note 1)
+ * 					IMAGE|NACK	(Note 2)
+ *
+ * PREP		  prepare read image	PREP|ACK	IMAGE_READ
+ * 					PREP|NACK	(As NACK_IMAGE)
+ *
+ * IO		  read image		IO|ACK		POST_RESUME
+ *
+ * POST_RESUME	  thaw, post-script			RUNNING
+ *
+ * INIT_0	  init 0
+ *
+ * Other messages:
+ *
+ * - PING: Request for all other live nodes to send a PONG. Used at startup to
+ *   announce presence, when a node is suspected dead and periodically, in case
+ *   segments of the network are [un]plugged.
+ *
+ * - PONG: Response to a PING.
+ *
+ * - ABORT: Request to cancel writing an image.
+ *
+ * - BYE: Notification that this node is shutting down.
+ *
+ * Note 1: Repeated at 3s intervals until we continue to boot/resume, so that
+ * nodes which are slower to start up can get state synchronised. If a node
+ * starting up sees other nodes sending RESUME_PREP or IMAGE_READ, it may send
+ * ACK_IMAGE and they will wait for it to catch up. If it sees ACK_READ, it
+ * must invalidate its image (if any) and boot normally.
+ *
+ * Note 2: May occur when one node lost power or powered off while others
+ * hibernated. This node waits for others to complete resuming (ACK_READ)
+ * before completing its boot, so that it appears as a fail node restarting.
+ *
+ * If any node has an image, then it also has a list of nodes that hibernated
+ * in synchronisation with it. The node will wait for other nodes to appear
+ * or timeout before beginning its restoration.
+ *
+ * If a node has no image, it needs to wait, in case other nodes which do have
+ * an image are going to resume, but are taking longer to announce their
+ * presence. For this reason, the user can specify a timeout value and a number
+ * of nodes detected before we just continue. (We might want to assume in a
+ * cluster of, say, 15 nodes, if 8 others have booted without finding an image,
+ * the remaining nodes will too. This might help in situations where some nodes
+ * are much slower to boot, or more subject to hardware failures or such like).
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_io.h"
+
+#if 1
+#define PRINTK(a, b...) do { printk(a, ##b); } while (0)
+#else
+#define PRINTK(a, b...) do { } while (0)
+#endif
+
+static int loopback_mode;
+static int num_local_nodes = 1;
+#define MAX_LOCAL_NODES 8
+#define SADDR (loopback_mode ? b->sid : h->saddr)
+
+#define MYNAME "TuxOnIce Clustering"
+
+enum cluster_message {
+	MSG_ACK = 1,
+	MSG_NACK = 2,
+	MSG_PING = 4,
+	MSG_ABORT = 8,
+	MSG_BYE = 16,
+	MSG_HIBERNATE = 32,
+	MSG_IMAGE = 64,
+	MSG_IO = 128,
+	MSG_RUNNING = 256
+};
+
+static char *str_message(int message)
+{
+	switch (message) {
+	case 4:
+		return "Ping";
+	case 8:
+		return "Abort";
+	case 9:
+		return "Abort acked";
+	case 10:
+		return "Abort nacked";
+	case 16:
+		return "Bye";
+	case 17:
+		return "Bye acked";
+	case 18:
+		return "Bye nacked";
+	case 32:
+		return "Hibernate request";
+	case 33:
+		return "Hibernate ack";
+	case 34:
+		return "Hibernate nack";
+	case 64:
+		return "Image exists?";
+	case 65:
+		return "Image does exist";
+	case 66:
+		return "No image here";
+	case 128:
+		return "I/O";
+	case 129:
+		return "I/O okay";
+	case 130:
+		return "I/O failed";
+	case 256:
+		return "Running";
+	default:
+		printk("Unrecognised message %d.\n", message);
+		return "Unrecognised message (see dmesg)";
+	}
+}
+
+#define MSG_ACK_MASK (MSG_ACK | MSG_NACK)
+#define MSG_STATE_MASK (~MSG_ACK_MASK)
+
+struct node_info {
+	struct list_head member_list;
+	wait_queue_head_t member_events;
+	spinlock_t member_list_lock;
+	spinlock_t receive_lock;
+	int peer_count, ignored_peer_count;
+	struct toi_sysfs_data sysfs_data;
+	enum cluster_message current_message;
+};
+
+struct node_info node_array[MAX_LOCAL_NODES];
+
+struct cluster_member {
+	__be32 addr;
+	enum cluster_message message;
+	struct list_head list;
+	int ignore;
+};
+
+#define toi_cluster_port_send 3501
+#define toi_cluster_port_recv 3502
+
+static struct net_device *net_dev;
+static struct toi_module_ops toi_cluster_ops;
+
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+		struct packet_type *pt, struct net_device *orig_dev);
+
+static struct packet_type toi_cluster_packet_type = {
+	.type =	__constant_htons(ETH_P_IP),
+	.func =	toi_recv,
+};
+
+struct toi_pkt {		/* BOOTP packet format */
+	struct iphdr iph;	/* IP header */
+	struct udphdr udph;	/* UDP header */
+	u8 htype;		/* HW address type */
+	u8 hlen;		/* HW address length */
+	__be32 xid;		/* Transaction ID */
+	__be16 secs;		/* Seconds since we started */
+	__be16 flags;		/* Just what it says */
+	u8 hw_addr[16];		/* Sender's HW address */
+	u16 message;		/* Message */
+	unsigned long sid;	/* Source ID for loopback testing */
+};
+
+static char toi_cluster_iface[IFNAMSIZ] = CONFIG_TOI_DEFAULT_CLUSTER_INTERFACE;
+
+static int added_pack;
+
+static int others_have_image;
+
+/* Key used to allow multiple clusters on the same lan */
+static char toi_cluster_key[32] = CONFIG_TOI_DEFAULT_CLUSTER_KEY;
+static char pre_hibernate_script[255] =
+	CONFIG_TOI_DEFAULT_CLUSTER_PRE_HIBERNATE;
+static char post_hibernate_script[255] =
+	CONFIG_TOI_DEFAULT_CLUSTER_POST_HIBERNATE;
+
+/*			List of cluster members			*/
+static unsigned long continue_delay = 5 * HZ;
+static unsigned long cluster_message_timeout = 3 * HZ;
+
+/* 		=== Membership list === 	*/
+
+static void print_member_info(int index)
+{
+	struct cluster_member *this;
+
+	printk(KERN_INFO "==> Dumping node %d.\n", index);
+
+	list_for_each_entry(this, &node_array[index].member_list, list)
+		printk(KERN_INFO "%d.%d.%d.%d last message %s. %s\n",
+				NIPQUAD(this->addr),
+				str_message(this->message),
+				this->ignore ? "(Ignored)" : "");
+	printk(KERN_INFO "== Done ==\n");
+}
+
+static struct cluster_member *__find_member(int index, __be32 addr)
+{
+	struct cluster_member *this;
+
+	list_for_each_entry(this, &node_array[index].member_list, list) {
+		if (this->addr != addr)
+			continue;
+
+		return this;
+	}
+
+	return NULL;
+}
+
+static void set_ignore(int index, __be32 addr, struct cluster_member *this)
+{
+	if (this->ignore) {
+		PRINTK("Node %d already ignoring %d.%d.%d.%d.\n",
+				index, NIPQUAD(addr));
+		return;
+	}
+
+	PRINTK("Node %d sees node %d.%d.%d.%d now being ignored.\n",
+				index, NIPQUAD(addr));
+	this->ignore = 1;
+	node_array[index].ignored_peer_count++;
+}
+
+static int __add_update_member(int index, __be32 addr, int message)
+{
+	struct cluster_member *this;
+
+	this = __find_member(index, addr);
+	if (this) {
+		if (this->message != message) {
+			this->message = message;
+			if ((message & MSG_NACK) &&
+			    (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+				set_ignore(index, addr, this);
+			PRINTK("Node %d sees node %d.%d.%d.%d now sending "
+					"%s.\n", index, NIPQUAD(addr),
+					str_message(message));
+			wake_up(&node_array[index].member_events);
+		}
+		return 0;
+	}
+
+	this = (struct cluster_member *) toi_kzalloc(36,
+			sizeof(struct cluster_member), GFP_KERNEL);
+
+	if (!this)
+		return -1;
+
+	this->addr = addr;
+	this->message = message;
+	this->ignore = 0;
+	INIT_LIST_HEAD(&this->list);
+
+	node_array[index].peer_count++;
+
+	PRINTK("Node %d sees node %d.%d.%d.%d sending %s.\n", index,
+			NIPQUAD(addr), str_message(message));
+
+	if ((message & MSG_NACK) &&
+	    (message & (MSG_HIBERNATE | MSG_IMAGE | MSG_IO)))
+		set_ignore(index, addr, this);
+	list_add_tail(&this->list, &node_array[index].member_list);
+	return 1;
+}
+
+static int add_update_member(int index, __be32 addr, int message)
+{
+	int result;
+	unsigned long flags;
+	spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+	result = __add_update_member(index, addr, message);
+	spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+
+	print_member_info(index);
+
+	wake_up(&node_array[index].member_events);
+
+	return result;
+}
+
+static void del_member(int index, __be32 addr)
+{
+	struct cluster_member *this;
+	unsigned long flags;
+
+	spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+	this = __find_member(index, addr);
+
+	if (this) {
+		list_del_init(&this->list);
+		toi_kfree(36, this);
+		node_array[index].peer_count--;
+	}
+
+	spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+/* 		=== Message transmission ===	*/
+
+static void toi_send_if(int message, unsigned long my_id);
+
+/*
+ *  Process received TOI packet.
+ */
+static int toi_recv(struct sk_buff *skb, struct net_device *dev,
+		struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct toi_pkt *b;
+	struct iphdr *h;
+	int len, result, index;
+	unsigned long addr, message, ack;
+
+	/* Perform verifications before taking the lock.  */
+	if (skb->pkt_type == PACKET_OTHERHOST)
+		goto drop;
+
+	if (dev != net_dev)
+		goto drop;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NET_RX_DROP;
+
+	if (!pskb_may_pull(skb,
+			   sizeof(struct iphdr) +
+			   sizeof(struct udphdr)))
+		goto drop;
+
+	b = (struct toi_pkt *)skb_network_header(skb);
+	h = &b->iph;
+
+	if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
+		goto drop;
+
+	/* Fragments are not supported */
+	if (h->frag_off & htons(IP_OFFSET | IP_MF)) {
+		if (net_ratelimit())
+			printk(KERN_ERR "TuxOnIce: Ignoring fragmented "
+			       "cluster message.\n");
+		goto drop;
+	}
+
+	if (skb->len < ntohs(h->tot_len))
+		goto drop;
+
+	if (ip_fast_csum((char *) h, h->ihl))
+		goto drop;
+
+	if (b->udph.source != htons(toi_cluster_port_send) ||
+	    b->udph.dest != htons(toi_cluster_port_recv))
+		goto drop;
+
+	if (ntohs(h->tot_len) < ntohs(b->udph.len) + sizeof(struct iphdr))
+		goto drop;
+
+	len = ntohs(b->udph.len) - sizeof(struct udphdr);
+
+	/* Ok the front looks good, make sure we can get at the rest.  */
+	if (!pskb_may_pull(skb, skb->len))
+		goto drop;
+
+	b = (struct toi_pkt *)skb_network_header(skb);
+	h = &b->iph;
+
+	addr = SADDR;
+	PRINTK(">>> Message %s received from " NIPQUAD_FMT ".\n",
+			str_message(b->message), NIPQUAD(addr));
+
+	message = b->message & MSG_STATE_MASK;
+	ack = b->message & MSG_ACK_MASK;
+
+	for (index = 0; index < num_local_nodes; index++) {
+		int new_message = node_array[index].current_message,
+		    old_message = new_message;
+
+		if (index == SADDR || !old_message) {
+			PRINTK("Ignoring node %d (offline or self).\n", index);
+			continue;
+		}
+
+		/* One message at a time, please. */
+		spin_lock(&node_array[index].receive_lock);
+
+		result = add_update_member(index, SADDR, b->message);
+		if (result == -1) {
+			printk(KERN_INFO "Failed to add new cluster member "
+					NIPQUAD_FMT ".\n",
+					NIPQUAD(addr));
+			goto drop_unlock;
+		}
+
+		switch (b->message & MSG_STATE_MASK) {
+		case MSG_PING:
+			break;
+		case MSG_ABORT:
+			break;
+		case MSG_BYE:
+			break;
+		case MSG_HIBERNATE:
+			/* Can I hibernate? */
+			new_message = MSG_HIBERNATE |
+				((index & 1) ? MSG_NACK : MSG_ACK);
+			break;
+		case MSG_IMAGE:
+			/* Can I resume? */
+			new_message = MSG_IMAGE |
+				((index & 1) ? MSG_NACK : MSG_ACK);
+			if (new_message != old_message)
+				printk("Setting whether I can resume to %d.\n",
+						new_message);
+			break;
+		case MSG_IO:
+			new_message = MSG_IO | MSG_ACK;
+			break;
+		case MSG_RUNNING:
+			break;
+		default:
+			if (net_ratelimit())
+				printk(KERN_ERR "Unrecognised TuxOnIce cluster"
+					" message %d from " NIPQUAD_FMT ".\n",
+					b->message, NIPQUAD(addr));
+		};
+
+		if (old_message != new_message) {
+			node_array[index].current_message = new_message;
+			printk(KERN_INFO ">>> Sending new message for node "
+					"%d.\n", index);
+			toi_send_if(new_message, index);
+		} else if (!ack) {
+			printk(KERN_INFO ">>> Resending message for node %d.\n",
+					index);
+			toi_send_if(new_message, index);
+		}
+drop_unlock:
+		spin_unlock(&node_array[index].receive_lock);
+	};
+
+drop:
+	/* Throw the packet out. */
+	kfree_skb(skb);
+
+	return 0;
+}
+
+/*
+ *  Send cluster message to single interface.
+ */
+static void toi_send_if(int message, unsigned long my_id)
+{
+	struct sk_buff *skb;
+	struct toi_pkt *b;
+	int hh_len = LL_RESERVED_SPACE(net_dev);
+	struct iphdr *h;
+
+	/* Allocate packet */
+	skb = alloc_skb(sizeof(struct toi_pkt) + hh_len + 15, GFP_KERNEL);
+	if (!skb)
+		return;
+	skb_reserve(skb, hh_len);
+	b = (struct toi_pkt *) skb_put(skb, sizeof(struct toi_pkt));
+	memset(b, 0, sizeof(struct toi_pkt));
+
+	/* Construct IP header */
+	skb_reset_network_header(skb);
+	h = ip_hdr(skb);
+	h->version = 4;
+	h->ihl = 5;
+	h->tot_len = htons(sizeof(struct toi_pkt));
+	h->frag_off = htons(IP_DF);
+	h->ttl = 64;
+	h->protocol = IPPROTO_UDP;
+	h->daddr = htonl(INADDR_BROADCAST);
+	h->check = ip_fast_csum((unsigned char *) h, h->ihl);
+
+	/* Construct UDP header */
+	b->udph.source = htons(toi_cluster_port_send);
+	b->udph.dest = htons(toi_cluster_port_recv);
+	b->udph.len = htons(sizeof(struct toi_pkt) - sizeof(struct iphdr));
+	/* UDP checksum not calculated -- explicitly allowed in BOOTP RFC */
+
+	/* Construct message */
+	b->message = message;
+	b->sid = my_id;
+	b->htype = net_dev->type; /* can cause undefined behavior */
+	b->hlen = net_dev->addr_len;
+	memcpy(b->hw_addr, net_dev->dev_addr, net_dev->addr_len);
+	b->secs = htons(3); /* 3 seconds */
+
+	/* Chain packet down the line... */
+	skb->dev = net_dev;
+	skb->protocol = htons(ETH_P_IP);
+	if ((dev_hard_header(skb, net_dev, ntohs(skb->protocol),
+		     net_dev->broadcast, net_dev->dev_addr, skb->len) < 0) ||
+			dev_queue_xmit(skb) < 0)
+		printk(KERN_INFO "E");
+}
+
+/*	=========================================		*/
+
+/*			kTOICluster			*/
+
+static atomic_t num_cluster_threads;
+static DECLARE_WAIT_QUEUE_HEAD(clusterd_events);
+
+static int kTOICluster(void *data)
+{
+	unsigned long my_id;
+
+	my_id = atomic_add_return(1, &num_cluster_threads) - 1;
+	node_array[my_id].current_message = (unsigned long) data;
+
+	PRINTK("kTOICluster daemon %lu starting.\n", my_id);
+
+	current->flags |= PF_NOFREEZE;
+
+	while (node_array[my_id].current_message) {
+		toi_send_if(node_array[my_id].current_message, my_id);
+		sleep_on_timeout(&clusterd_events,
+				cluster_message_timeout);
+		PRINTK("Link state %lu is %d.\n", my_id,
+				node_array[my_id].current_message);
+	}
+
+	toi_send_if(MSG_BYE, my_id);
+	atomic_dec(&num_cluster_threads);
+	wake_up(&clusterd_events);
+
+	PRINTK("kTOICluster daemon %lu exiting.\n", my_id);
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+static void kill_clusterd(void)
+{
+	int i;
+
+	for (i = 0; i < num_local_nodes; i++) {
+		if (node_array[i].current_message) {
+			PRINTK("Seeking to kill clusterd %d.\n", i);
+			node_array[i].current_message = 0;
+		}
+	}
+	wait_event(clusterd_events,
+			!atomic_read(&num_cluster_threads));
+	PRINTK("All cluster daemons have exited.\n");
+}
+
+static int peers_not_in_message(int index, int message, int precise)
+{
+	struct cluster_member *this;
+	unsigned long flags;
+	int result = 0;
+
+	spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+	list_for_each_entry(this, &node_array[index].member_list, list) {
+		if (this->ignore)
+			continue;
+
+		PRINTK("Peer %d.%d.%d.%d sending %s. "
+			"Seeking %s.\n",
+			NIPQUAD(this->addr),
+			str_message(this->message), str_message(message));
+		if ((precise ? this->message :
+					this->message & MSG_STATE_MASK) !=
+					message)
+			result++;
+	}
+	spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+	PRINTK("%d peers in sought message.\n", result);
+	return result;
+}
+
+static void reset_ignored(int index)
+{
+	struct cluster_member *this;
+	unsigned long flags;
+
+	spin_lock_irqsave(&node_array[index].member_list_lock, flags);
+	list_for_each_entry(this, &node_array[index].member_list, list)
+		this->ignore = 0;
+	node_array[index].ignored_peer_count = 0;
+	spin_unlock_irqrestore(&node_array[index].member_list_lock, flags);
+}
+
+static int peers_in_message(int index, int message, int precise)
+{
+	return node_array[index].peer_count -
+		node_array[index].ignored_peer_count -
+		peers_not_in_message(index, message, precise);
+}
+
+static int time_to_continue(int index, unsigned long start, int message)
+{
+	int first = peers_not_in_message(index, message, 0);
+	int second = peers_in_message(index, message, 1);
+
+	PRINTK("First part returns %d, second returns %d.\n", first, second);
+
+	if (!first && !second) {
+		PRINTK("All peers answered message %d.\n",
+			message);
+		return 1;
+	}
+
+	if (time_after(jiffies, start + continue_delay)) {
+		PRINTK("Timeout reached.\n");
+		return 1;
+	}
+
+	PRINTK("Not time to continue yet (%lu < %lu).\n", jiffies,
+			start + continue_delay);
+	return 0;
+}
+
+void toi_initiate_cluster_hibernate(void)
+{
+	int result;
+	unsigned long start;
+
+	result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+	if (result)
+		return;
+
+	toi_send_if(MSG_HIBERNATE, 0);
+
+	start = jiffies;
+	wait_event(node_array[0].member_events,
+			time_to_continue(0, start, MSG_HIBERNATE));
+
+	if (test_action_state(TOI_FREEZER_TEST)) {
+		toi_send_if(MSG_ABORT, 0);
+
+		start = jiffies;
+		wait_event(node_array[0].member_events,
+			time_to_continue(0, start, MSG_RUNNING));
+
+		do_toi_step(STEP_QUIET_CLEANUP);
+		return;
+	}
+
+	toi_send_if(MSG_IO, 0);
+
+	result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+	if (result)
+		return;
+
+	/* This code runs at resume time too! */
+	if (toi_in_hibernate)
+		result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+}
+EXPORT_SYMBOL_GPL(toi_initiate_cluster_hibernate);
+
+/* toi_cluster_print_debug_stats
+ *
+ * Description:	Print information to be recorded for debugging purposes into a
+ * 		buffer.
+ * Arguments:	buffer: Pointer to a buffer into which the debug info will be
+ * 			printed.
+ * 		size:	Size of the buffer.
+ * Returns:	Number of characters written to the buffer.
+ */
+static int toi_cluster_print_debug_stats(char *buffer, int size)
+{
+	int len;
+
+	if (strlen(toi_cluster_iface))
+		len = snprintf_used(buffer, size,
+				"- Cluster interface is '%s'.\n",
+				toi_cluster_iface);
+	else
+		len = snprintf_used(buffer, size,
+				"- Cluster support is disabled.\n");
+	return len;
+}
+
+/* cluster_memory_needed
+ *
+ * Description:	Tell the caller how much memory we need to operate during
+ * 		hibernate/resume.
+ * Returns:	Unsigned long. Maximum number of bytes of memory required for
+ * 		operation.
+ */
+static int toi_cluster_memory_needed(void)
+{
+	return 0;
+}
+
+static int toi_cluster_storage_needed(void)
+{
+	return 1 + strlen(toi_cluster_iface);
+}
+
+/* toi_cluster_save_config_info
+ *
+ * Description:	Save informaton needed when reloading the image at resume time.
+ * Arguments:	Buffer:		Pointer to a buffer of size PAGE_SIZE.
+ * Returns:	Number of bytes used for saving our data.
+ */
+static int toi_cluster_save_config_info(char *buffer)
+{
+	strcpy(buffer, toi_cluster_iface);
+	return strlen(toi_cluster_iface + 1);
+}
+
+/* toi_cluster_load_config_info
+ *
+ * Description:	Reload information needed for declustering the image at
+ * 		resume time.
+ * Arguments:	Buffer:		Pointer to the start of the data.
+ *		Size:		Number of bytes that were saved.
+ */
+static void toi_cluster_load_config_info(char *buffer, int size)
+{
+	strncpy(toi_cluster_iface, buffer, size);
+	return;
+}
+
+static void cluster_startup(void)
+{
+	int have_image = do_check_can_resume(), i;
+	unsigned long start = jiffies, initial_message;
+	struct task_struct *p;
+
+	initial_message = MSG_IMAGE;
+
+	have_image = 1;
+
+	for (i = 0; i < num_local_nodes; i++) {
+		PRINTK("Starting ktoiclusterd %d.\n", i);
+		p = kthread_create(kTOICluster, (void *) initial_message,
+				"ktoiclusterd/%d", i);
+		if (IS_ERR(p)) {
+			printk("Failed to start ktoiclusterd.\n");
+			return;
+		}
+
+		wake_up_process(p);
+	}
+
+	/* Wait for delay or someone else sending first message */
+	wait_event(node_array[0].member_events, time_to_continue(0, start,
+				MSG_IMAGE));
+
+	others_have_image = peers_in_message(0, MSG_IMAGE | MSG_ACK, 1);
+
+	printk(KERN_INFO "Continuing. I %shave an image. Peers with image:"
+		" %d.\n", have_image ? "" : "don't ", others_have_image);
+
+	if (have_image) {
+		int result;
+
+		/* Start to resume */
+		printk(KERN_INFO "  === Starting to resume ===  \n");
+		node_array[0].current_message = MSG_IO;
+		toi_send_if(MSG_IO, 0);
+
+		/* result = do_toi_step(STEP_RESUME_LOAD_PS1); */
+		result = 0;
+
+		if (!result) {
+			/*
+			 * Atomic restore - we'll come back in the hibernation
+			 * path.
+			 */
+
+			/* result = do_toi_step(STEP_RESUME_DO_RESTORE); */
+			result = 0;
+
+			/* do_toi_step(STEP_QUIET_CLEANUP); */
+		}
+
+		node_array[0].current_message |= MSG_NACK;
+
+		/* For debugging - disable for real life? */
+		wait_event(node_array[0].member_events,
+				time_to_continue(0, start, MSG_IO));
+	}
+
+	if (others_have_image) {
+		/* Wait for them to resume */
+		printk(KERN_INFO "Waiting for other nodes to resume.\n");
+		start = jiffies;
+		wait_event(node_array[0].member_events,
+				time_to_continue(0, start, MSG_RUNNING));
+		if (peers_not_in_message(0, MSG_RUNNING, 0))
+			printk(KERN_INFO "Timed out while waiting for other "
+					"nodes to resume.\n");
+	}
+
+	/* Find out whether an image exists here. Send ACK_IMAGE or NACK_IMAGE
+	 * as appropriate.
+	 *
+	 * If we don't have an image:
+	 * - Wait until someone else says they have one, or conditions are met
+	 *   for continuing to boot (n machines or t seconds).
+	 * - If anyone has an image, wait for them to resume before continuing
+	 *   to boot.
+	 *
+	 * If we have an image:
+	 * - Wait until conditions are met before continuing to resume (n
+	 *   machines or t seconds). Send RESUME_PREP and freeze processes.
+	 *   NACK_PREP if freezing fails (shouldn't) and follow logic for
+	 *   us having no image above. On success, wait for [N]ACK_PREP from
+	 *   other machines. Read image (including atomic restore) until done.
+	 *   Wait for ACK_READ from others (should never fail). Thaw processes
+	 *   and do post-resume. (The section after the atomic restore is done
+	 *   via the code for hibernating).
+	 */
+
+	node_array[0].current_message = MSG_RUNNING;
+}
+
+/* toi_cluster_open_iface
+ *
+ * Description:	Prepare to use an interface.
+ */
+
+static int toi_cluster_open_iface(void)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+
+	for_each_netdev(&init_net, dev) {
+		if (/* dev == &init_net.loopback_dev || */
+		    strcmp(dev->name, toi_cluster_iface))
+			continue;
+
+		net_dev = dev;
+		break;
+	}
+
+	rtnl_unlock();
+
+	if (!net_dev) {
+		printk(KERN_ERR MYNAME ": Device %s not found.\n",
+				toi_cluster_iface);
+		return -ENODEV;
+	}
+
+	dev_add_pack(&toi_cluster_packet_type);
+	added_pack = 1;
+
+	loopback_mode = (net_dev == init_net.loopback_dev);
+	num_local_nodes = loopback_mode ? 8 : 1;
+
+	PRINTK("Loopback mode is %s. Number of local nodes is %d.\n",
+			loopback_mode ? "on" : "off", num_local_nodes);
+
+	cluster_startup();
+	return 0;
+}
+
+/* toi_cluster_close_iface
+ *
+ * Description: Stop using an interface.
+ */
+
+static int toi_cluster_close_iface(void)
+{
+	kill_clusterd();
+	if (added_pack) {
+		dev_remove_pack(&toi_cluster_packet_type);
+		added_pack = 0;
+	}
+	return 0;
+}
+
+static void write_side_effect(void)
+{
+	if (toi_cluster_ops.enabled) {
+		toi_cluster_open_iface();
+		set_toi_state(TOI_CLUSTER_MODE);
+	} else {
+		toi_cluster_close_iface();
+		clear_toi_state(TOI_CLUSTER_MODE);
+	}
+}
+
+static void node_write_side_effect(void)
+{
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+	{
+		TOI_ATTR("interface", SYSFS_RW),
+		SYSFS_STRING(toi_cluster_iface, IFNAMSIZ, 0)
+	},
+
+	{
+		TOI_ATTR("enabled", SYSFS_RW),
+		SYSFS_INT(&toi_cluster_ops.enabled, 0, 1, 0),
+		.write_side_effect = write_side_effect,
+	},
+
+	{
+		TOI_ATTR("cluster_name", SYSFS_RW),
+		SYSFS_STRING(toi_cluster_key, 32, 0)
+	},
+
+	{
+		TOI_ATTR("pre-hibernate-script", SYSFS_RW),
+		SYSFS_STRING(pre_hibernate_script, 256, 0)
+	},
+
+	{
+		TOI_ATTR("post-hibernate-script", SYSFS_RW),
+		SYSFS_STRING(post_hibernate_script, 256, 0)
+	},
+
+	{
+		TOI_ATTR("continue_delay", SYSFS_RW),
+		SYSFS_UL(&continue_delay, HZ / 2, 60 * HZ, 0)
+	}
+};
+
+/*
+ * Ops structure.
+ */
+
+static struct toi_module_ops toi_cluster_ops = {
+	.type			= FILTER_MODULE,
+	.name			= "Cluster",
+	.directory		= "cluster",
+	.module			= THIS_MODULE,
+	.memory_needed 		= toi_cluster_memory_needed,
+	.print_debug_info	= toi_cluster_print_debug_stats,
+	.save_config_info	= toi_cluster_save_config_info,
+	.load_config_info	= toi_cluster_load_config_info,
+	.storage_needed		= toi_cluster_storage_needed,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+#ifdef MODULE
+#define INIT static __init
+#define EXIT static __exit
+#else
+#define INIT
+#define EXIT
+#endif
+
+INIT int toi_cluster_init(void)
+{
+	int temp = toi_register_module(&toi_cluster_ops), i;
+	struct kobject *kobj = toi_cluster_ops.dir_kobj;
+
+	for (i = 0; i < MAX_LOCAL_NODES; i++) {
+		node_array[i].current_message = 0;
+		INIT_LIST_HEAD(&node_array[i].member_list);
+		init_waitqueue_head(&node_array[i].member_events);
+		spin_lock_init(&node_array[i].member_list_lock);
+		spin_lock_init(&node_array[i].receive_lock);
+
+		/* Set up sysfs entry */
+		node_array[i].sysfs_data.attr.name = toi_kzalloc(8,
+				sizeof(node_array[i].sysfs_data.attr.name),
+				GFP_KERNEL);
+		sprintf((char *) node_array[i].sysfs_data.attr.name, "node_%d",
+				i);
+		node_array[i].sysfs_data.attr.mode = SYSFS_RW;
+		node_array[i].sysfs_data.type = TOI_SYSFS_DATA_INTEGER;
+		node_array[i].sysfs_data.flags = 0;
+		node_array[i].sysfs_data.data.integer.variable =
+			(int *) &node_array[i].current_message;
+		node_array[i].sysfs_data.data.integer.minimum = 0;
+		node_array[i].sysfs_data.data.integer.maximum = INT_MAX;
+		node_array[i].sysfs_data.write_side_effect =
+			node_write_side_effect;
+		toi_register_sysfs_file(kobj, &node_array[i].sysfs_data);
+	}
+
+	toi_cluster_ops.enabled = (strlen(toi_cluster_iface) > 0);
+
+	if (toi_cluster_ops.enabled)
+		toi_cluster_open_iface();
+
+	return temp;
+}
+
+EXIT void toi_cluster_exit(void)
+{
+	int i;
+	toi_cluster_close_iface();
+
+	for (i = 0; i < MAX_LOCAL_NODES; i++)
+		toi_unregister_sysfs_file(toi_cluster_ops.dir_kobj,
+				&node_array[i].sysfs_data);
+	toi_unregister_module(&toi_cluster_ops);
+}
+
+static int __init toi_cluster_iface_setup(char *iface)
+{
+	toi_cluster_ops.enabled = (*iface &&
+			strcmp(iface, "off"));
+
+	if (toi_cluster_ops.enabled)
+		strncpy(toi_cluster_iface, iface, strlen(iface));
+}
+
+__setup("toi_cluster=", toi_cluster_iface_setup);
+
+#ifdef MODULE
+MODULE_LICENSE("GPL");
+module_init(toi_cluster_init);
+module_exit(toi_cluster_exit);
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("Cluster Support for TuxOnIce");
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_cluster.h linux-2.6-block-custom/kernel/power/tuxonice_cluster.h
--- linux-2.6-block/kernel/power/tuxonice_cluster.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_cluster.h	2008-09-26 19:48:23.983764697 +0900
@@ -0,0 +1,19 @@
+/*
+ * kernel/power/tuxonice_cluster.h
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ * Copyright (C) 2006 Red Hat, inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_TOI_CLUSTER
+extern int toi_cluster_init(void);
+extern void toi_cluster_exit(void);
+extern void toi_initiate_cluster_hibernate(void);
+#else
+static inline int toi_cluster_init(void) { return 0; }
+static inline void toi_cluster_exit(void) { }
+static inline void toi_initiate_cluster_hibernate(void) { }
+#endif
+
diff -Npur linux-2.6-block/kernel/power/tuxonice_compress.c linux-2.6-block-custom/kernel/power/tuxonice_compress.c
--- linux-2.6-block/kernel/power/tuxonice_compress.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_compress.c	2008-09-26 19:48:23.983764697 +0900
@@ -0,0 +1,441 @@
+/*
+ * kernel/power/compression.c
+ *
+ * Copyright (C) 2003-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains data compression routines for TuxOnIce,
+ * using cryptoapi.
+ */
+
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/crypto.h>
+
+#include "tuxonice_builtin.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_alloc.h"
+
+static int toi_expected_compression;
+
+static struct toi_module_ops toi_compression_ops;
+static struct toi_module_ops *next_driver;
+
+static char toi_compressor_name[32] = "lzf";
+
+static DEFINE_MUTEX(stats_lock);
+
+struct cpu_context {
+	u8 *page_buffer;
+	struct crypto_comp *transform;
+	unsigned int len;
+	char *buffer_start;
+};
+
+static DEFINE_PER_CPU(struct cpu_context, contexts);
+
+static int toi_compress_prepare_result;
+
+/*
+ * toi_compress_cleanup
+ *
+ * Frees memory allocated for our labours.
+ */
+static void toi_compress_cleanup(int toi_or_resume)
+{
+	int cpu;
+
+	if (!toi_or_resume)
+		return;
+
+	for_each_online_cpu(cpu) {
+		struct cpu_context *this = &per_cpu(contexts, cpu);
+		if (this->transform) {
+			crypto_free_comp(this->transform);
+			this->transform = NULL;
+		}
+
+		if (this->page_buffer)
+			toi_free_page(16, (unsigned long) this->page_buffer);
+
+		this->page_buffer = NULL;
+	}
+}
+
+/*
+ * toi_crypto_prepare
+ *
+ * Prepare to do some work by allocating buffers and transforms.
+ */
+static int toi_compress_crypto_prepare(void)
+{
+	int cpu;
+
+	if (!*toi_compressor_name) {
+		printk(KERN_INFO "TuxOnIce: Compression enabled but no "
+				"compressor name set.\n");
+		return 1;
+	}
+
+	for_each_online_cpu(cpu) {
+		struct cpu_context *this = &per_cpu(contexts, cpu);
+		this->transform = crypto_alloc_comp(toi_compressor_name, 0, 0);
+		if (IS_ERR(this->transform)) {
+			printk(KERN_INFO "TuxOnIce: Failed to initialise the "
+					"%s compression transform.\n",
+					toi_compressor_name);
+			this->transform = NULL;
+			return 1;
+		}
+
+		this->page_buffer =
+			(char *) toi_get_zeroed_page(16, TOI_ATOMIC_GFP);
+
+		if (!this->page_buffer) {
+			printk(KERN_ERR
+			  "Failed to allocate a page buffer for TuxOnIce "
+			  "encryption driver.\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * toi_compress_init
+ */
+
+static int toi_compress_init(int toi_or_resume)
+{
+	if (!toi_or_resume)
+		return 0;
+
+	toi_compress_bytes_in = toi_compress_bytes_out = 0;
+
+	next_driver = toi_get_next_filter(&toi_compression_ops);
+
+	if (!next_driver)
+		return -ECHILD;
+
+	toi_compress_prepare_result = toi_compress_crypto_prepare();
+
+	return 0;
+}
+
+/*
+ * toi_compress_rw_init()
+ */
+
+int toi_compress_rw_init(int rw, int stream_number)
+{
+	if (toi_compress_prepare_result) {
+		printk("Failed to initialise compression algorithm.\n");
+		if (rw == READ)
+			return -ENODEV;
+		else
+			toi_compression_ops.enabled = 0;
+	}
+
+	return 0;
+}
+
+/*
+ * toi_compress_write_page()
+ *
+ * Compress a page of data, buffering output and passing on filled
+ * pages to the next module in the pipeline.
+ *
+ * Buffer_page:	Pointer to a buffer of size PAGE_SIZE, containing
+ * data to be compressed.
+ *
+ * Returns:	0 on success. Otherwise the error is that returned by later
+ * 		modules, -ECHILD if we have a broken pipeline or -EIO if
+ * 		zlib errs.
+ */
+static int toi_compress_write_page(unsigned long index,
+		struct page *buffer_page, unsigned int buf_size)
+{
+	int ret, cpu = smp_processor_id();
+	struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+	if (!ctx->transform)
+		return next_driver->write_page(index, buffer_page, buf_size);
+
+	ctx->buffer_start = kmap(buffer_page);
+
+	ctx->len = buf_size;
+
+	ret = crypto_comp_compress(ctx->transform,
+			ctx->buffer_start, buf_size,
+			ctx->page_buffer, &ctx->len);
+
+	kunmap(buffer_page);
+
+	if (ret) {
+		printk(KERN_INFO "Compression failed.\n");
+		goto failure;
+	}
+
+	mutex_lock(&stats_lock);
+	toi_compress_bytes_in += buf_size;
+	toi_compress_bytes_out += ctx->len;
+	mutex_unlock(&stats_lock);
+
+	if (ctx->len < buf_size) /* some compression */
+		ret = next_driver->write_page(index,
+				virt_to_page(ctx->page_buffer),
+				ctx->len);
+	else
+		ret = next_driver->write_page(index, buffer_page, buf_size);
+
+failure:
+	return ret;
+}
+
+/*
+ * toi_compress_read_page()
+ * @buffer_page: struct page *. Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Retrieve data from later modules and decompress it until the input buffer
+ * is filled.
+ * Zero if successful. Error condition from me or from downstream on failure.
+ */
+static int toi_compress_read_page(unsigned long *index,
+		struct page *buffer_page, unsigned int *buf_size)
+{
+	int ret, cpu = smp_processor_id();
+	unsigned int len;
+	unsigned int outlen = PAGE_SIZE;
+	char *buffer_start;
+	struct cpu_context *ctx = &per_cpu(contexts, cpu);
+
+	if (!ctx->transform)
+		return next_driver->read_page(index, buffer_page, buf_size);
+
+	/*
+	 * All our reads must be synchronous - we can't decompress
+	 * data that hasn't been read yet.
+	 */
+
+	*buf_size = PAGE_SIZE;
+
+	ret = next_driver->read_page(index, buffer_page, &len);
+
+	/* Error or uncompressed data */
+	if (ret || len == PAGE_SIZE)
+		return ret;
+
+	buffer_start = kmap(buffer_page);
+	memcpy(ctx->page_buffer, buffer_start, len);
+	ret = crypto_comp_decompress(
+			ctx->transform,
+			ctx->page_buffer,
+			len, buffer_start, &outlen);
+	if (ret)
+		abort_hibernate(TOI_FAILED_IO,
+			"Compress_read returned %d.\n", ret);
+	else if (outlen != PAGE_SIZE) {
+		abort_hibernate(TOI_FAILED_IO,
+			"Decompression yielded %d bytes instead of %ld.\n",
+			outlen, PAGE_SIZE);
+		printk("Decompression yielded %d bytes instead of %ld.\n",
+			outlen, PAGE_SIZE);
+		ret = -EIO;
+		*buf_size = outlen;
+	}
+	kunmap(buffer_page);
+	return ret;
+}
+
+/*
+ * toi_compress_print_debug_stats
+ * @buffer: Pointer to a buffer into which the debug info will be printed.
+ * @size: Size of the buffer.
+ *
+ * Print information to be recorded for debugging purposes into a buffer.
+ * Returns: Number of characters written to the buffer.
+ */
+
+static int toi_compress_print_debug_stats(char *buffer, int size)
+{
+	unsigned long pages_in = toi_compress_bytes_in >> PAGE_SHIFT,
+		      pages_out = toi_compress_bytes_out >> PAGE_SHIFT;
+	int len;
+
+	/* Output the compression ratio achieved. */
+	if (*toi_compressor_name)
+		len = snprintf_used(buffer, size, "- Compressor is '%s'.\n",
+				toi_compressor_name);
+	else
+		len = snprintf_used(buffer, size, "- Compressor is not set.\n");
+
+	if (pages_in)
+		len += snprintf_used(buffer+len, size - len,
+		  "  Compressed %lu bytes into %lu (%d percent compression).\n",
+		  toi_compress_bytes_in,
+		  toi_compress_bytes_out,
+		  (pages_in - pages_out) * 100 / pages_in);
+	return len;
+}
+
+/*
+ * toi_compress_compression_memory_needed
+ *
+ * Tell the caller how much memory we need to operate during hibernate/resume.
+ * Returns: Unsigned long. Maximum number of bytes of memory required for
+ * operation.
+ */
+static int toi_compress_memory_needed(void)
+{
+	return 2 * PAGE_SIZE;
+}
+
+static int toi_compress_storage_needed(void)
+{
+	return 4 * sizeof(unsigned long) + strlen(toi_compressor_name) + 1;
+}
+
+/*
+ * toi_compress_save_config_info
+ * @buffer: Pointer to a buffer of size PAGE_SIZE.
+ *
+ * Save informaton needed when reloading the image at resume time.
+ * Returns: Number of bytes used for saving our data.
+ */
+static int toi_compress_save_config_info(char *buffer)
+{
+	int namelen = strlen(toi_compressor_name) + 1;
+	int total_len;
+
+	*((unsigned long *) buffer) = toi_compress_bytes_in;
+	*((unsigned long *) (buffer + 1 * sizeof(unsigned long))) =
+		toi_compress_bytes_out;
+	*((unsigned long *) (buffer + 2 * sizeof(unsigned long))) =
+		toi_expected_compression;
+	*((unsigned long *) (buffer + 3 * sizeof(unsigned long))) = namelen;
+	strncpy(buffer + 4 * sizeof(unsigned long), toi_compressor_name,
+								namelen);
+	total_len = 4 * sizeof(unsigned long) + namelen;
+	return total_len;
+}
+
+/* toi_compress_load_config_info
+ * @buffer: Pointer to the start of the data.
+ * @size: Number of bytes that were saved.
+ *
+ * Description:	Reload information needed for decompressing the image at
+ * resume time.
+ */
+static void toi_compress_load_config_info(char *buffer, int size)
+{
+	int namelen;
+
+	toi_compress_bytes_in = *((unsigned long *) buffer);
+	toi_compress_bytes_out = *((unsigned long *) (buffer + 1 *
+				sizeof(unsigned long)));
+	toi_expected_compression = *((unsigned long *) (buffer + 2 *
+				sizeof(unsigned long)));
+	namelen = *((unsigned long *) (buffer + 3 * sizeof(unsigned long)));
+	if (strncmp(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
+				namelen)) {
+		toi_compress_cleanup(1);
+		strncpy(toi_compressor_name, buffer + 4 * sizeof(unsigned long),
+			namelen);
+		toi_compress_crypto_prepare();
+	}
+	return;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Description:	Returns the expected ratio between data passed into this module
+ * 		and the amount of data output when writing.
+ * Returns:	100 if the module is disabled. Otherwise the value set by the
+ * 		user via our sysfs entry.
+ */
+
+static int toi_compress_expected_ratio(void)
+{
+	if (!toi_compression_ops.enabled)
+		return 100;
+	else
+		return 100 - toi_expected_compression;
+}
+
+/*
+ * data for our sysfs entries.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+	{
+		TOI_ATTR("expected_compression", SYSFS_RW),
+		SYSFS_INT(&toi_expected_compression, 0, 99, 0)
+	},
+
+	{
+		TOI_ATTR("enabled", SYSFS_RW),
+		SYSFS_INT(&toi_compression_ops.enabled, 0, 1, 0)
+	},
+
+	{
+		TOI_ATTR("algorithm", SYSFS_RW),
+		SYSFS_STRING(toi_compressor_name, 31, 0)
+	}
+};
+
+/*
+ * Ops structure.
+ */
+static struct toi_module_ops toi_compression_ops = {
+	.type			= FILTER_MODULE,
+	.name			= "compression",
+	.directory		= "compression",
+	.module			= THIS_MODULE,
+	.initialise		= toi_compress_init,
+	.cleanup		= toi_compress_cleanup,
+	.memory_needed 		= toi_compress_memory_needed,
+	.print_debug_info	= toi_compress_print_debug_stats,
+	.save_config_info	= toi_compress_save_config_info,
+	.load_config_info	= toi_compress_load_config_info,
+	.storage_needed		= toi_compress_storage_needed,
+	.expected_compression	= toi_compress_expected_ratio,
+
+	.rw_init		= toi_compress_rw_init,
+
+	.write_page		= toi_compress_write_page,
+	.read_page		= toi_compress_read_page,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+
+static __init int toi_compress_load(void)
+{
+	return toi_register_module(&toi_compression_ops);
+}
+
+#ifdef MODULE
+static __exit void toi_compress_unload(void)
+{
+	toi_unregister_module(&toi_compression_ops);
+}
+
+module_init(toi_compress_load);
+module_exit(toi_compress_unload);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("Compression Support for TuxOnIce");
+#else
+late_initcall(toi_compress_load);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_extent.c linux-2.6-block-custom/kernel/power/tuxonice_extent.c
--- linux-2.6-block/kernel/power/tuxonice_extent.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_extent.c	2008-09-26 19:48:23.987790069 +0900
@@ -0,0 +1,303 @@
+/*
+ * kernel/power/tuxonice_extent.c
+ *
+ * Copyright (C) 2003-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * Distributed under GPLv2.
+ *
+ * These functions encapsulate the manipulation of storage metadata. For
+ * pageflags, we use dynamically allocated bitmaps.
+ */
+
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include "tuxonice_modules.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_alloc.h"
+#include "tuxonice_ui.h"
+#include "tuxonice.h"
+
+/* toi_get_extent
+ *
+ * Returns a free extent. May fail, returning NULL instead.
+ */
+static struct hibernate_extent *toi_get_extent(void)
+{
+	return (struct hibernate_extent *) toi_kzalloc(2,
+			sizeof(struct hibernate_extent), TOI_ATOMIC_GFP);
+}
+
+/* toi_put_extent_chain.
+ *
+ * Frees a whole chain of extents.
+ */
+void toi_put_extent_chain(struct hibernate_extent_chain *chain)
+{
+	struct hibernate_extent *this;
+
+	this = chain->first;
+
+	while (this) {
+		struct hibernate_extent *next = this->next;
+		toi_kfree(2, this);
+		chain->num_extents--;
+		this = next;
+	}
+
+	chain->first = NULL;
+	chain->last_touched = NULL;
+	chain->size = 0;
+}
+
+/*
+ * toi_add_to_extent_chain
+ *
+ * Add an extent to an existing chain.
+ */
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+		unsigned long start, unsigned long end)
+{
+	struct hibernate_extent *new_ext = NULL, *cur_ext = NULL;
+
+	/* Find the right place in the chain */
+	if (chain->last_touched && chain->last_touched->start < start)
+		cur_ext = chain->last_touched;
+	else if (chain->first && chain->first->start < start)
+		cur_ext = chain->first;
+
+	if (cur_ext) {
+		while (cur_ext->next && cur_ext->next->start < start)
+			cur_ext = cur_ext->next;
+
+		if (cur_ext->end == (start - 1)) {
+			struct hibernate_extent *next_ext = cur_ext->next;
+			cur_ext->end = end;
+
+			/* Merge with the following one? */
+			if (next_ext && cur_ext->end + 1 == next_ext->start) {
+				cur_ext->end = next_ext->end;
+				cur_ext->next = next_ext->next;
+				toi_kfree(2, next_ext);
+				chain->num_extents--;
+			}
+
+			chain->last_touched = cur_ext;
+			chain->size += (end - start + 1);
+
+			return 0;
+		}
+	}
+
+	new_ext = toi_get_extent();
+	if (!new_ext) {
+		printk(KERN_INFO "Error unable to append a new extent to the "
+				"chain.\n");
+		return -ENOMEM;
+	}
+
+	chain->num_extents++;
+	chain->size += (end - start + 1);
+	new_ext->start = start;
+	new_ext->end = end;
+
+	chain->last_touched = new_ext;
+
+	if (cur_ext) {
+		new_ext->next = cur_ext->next;
+		cur_ext->next = new_ext;
+	} else {
+		if (chain->first)
+			new_ext->next = chain->first;
+		chain->first = new_ext;
+	}
+
+	return 0;
+}
+
+/* toi_serialise_extent_chain
+ *
+ * Write a chain in the image.
+ */
+int toi_serialise_extent_chain(struct toi_module_ops *owner,
+		struct hibernate_extent_chain *chain)
+{
+	struct hibernate_extent *this;
+	int ret, i = 0;
+
+	ret = toiActiveAllocator->rw_header_chunk(WRITE, owner, (char *) chain,
+			2 * sizeof(int));
+	if (ret)
+		return ret;
+
+	this = chain->first;
+	while (this) {
+		ret = toiActiveAllocator->rw_header_chunk(WRITE, owner,
+				(char *) this, 2 * sizeof(unsigned long));
+		if (ret)
+			return ret;
+		this = this->next;
+		i++;
+	}
+
+	if (i != chain->num_extents) {
+		printk(KERN_EMERG "Saved %d extents but chain metadata says "
+			"there should be %d.\n", i, chain->num_extents);
+		return 1;
+	}
+
+	return ret;
+}
+
+/* toi_load_extent_chain
+ *
+ * Read back a chain saved in the image.
+ */
+int toi_load_extent_chain(struct hibernate_extent_chain *chain)
+{
+	struct hibernate_extent *this, *last = NULL;
+	int i, ret;
+
+	ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ, NULL,
+			(char *) chain, 2 * sizeof(int));
+	if (ret) {
+		printk("Failed to read size of extent chain.\n");
+		return 1;
+	}
+
+	for (i = 0; i < chain->num_extents; i++) {
+		this = toi_kzalloc(3, sizeof(struct hibernate_extent),
+				TOI_ATOMIC_GFP);
+		if (!this) {
+			printk(KERN_INFO "Failed to allocate a new extent.\n");
+			return -ENOMEM;
+		}
+		this->next = NULL;
+		ret = toiActiveAllocator->rw_header_chunk_noreadahead(READ,
+				NULL, (char *) this, 2 * sizeof(unsigned long));
+		if (ret) {
+			printk(KERN_INFO "Failed to an extent.\n");
+			return 1;
+		}
+		if (last)
+			last->next = this;
+		else
+			chain->first = this;
+		last = this;
+	}
+	return 0;
+}
+
+/* toi_extent_state_next
+ *
+ * Given a state, progress to the next valid entry. We may begin in an
+ * invalid state, as we do when invoked after extent_state_goto_start below.
+ *
+ * When using compression and expected_compression > 0, we let the image size
+ * be larger than storage, so we can validly run out of data to return.
+ */
+unsigned long toi_extent_state_next(struct hibernate_extent_iterate_state *state)
+{
+	if (state->current_chain == state->num_chains)
+		return 0;
+
+	if (state->current_extent) {
+		if (state->current_offset == state->current_extent->end) {
+			if (state->current_extent->next) {
+				state->current_extent =
+					state->current_extent->next;
+				state->current_offset =
+					state->current_extent->start;
+			} else {
+				state->current_extent = NULL;
+				state->current_offset = 0;
+			}
+		} else
+			state->current_offset++;
+	}
+
+	while (!state->current_extent) {
+		int chain_num = ++(state->current_chain);
+
+		if (chain_num == state->num_chains)
+			return 0;
+
+		state->current_extent = (state->chains + chain_num)->first;
+
+		if (!state->current_extent)
+			continue;
+
+		state->current_offset = state->current_extent->start;
+	}
+
+	return state->current_offset;
+}
+
+/* toi_extent_state_goto_start
+ *
+ * Find the first valid value in a group of chains.
+ */
+void toi_extent_state_goto_start(struct hibernate_extent_iterate_state *state)
+{
+	state->current_chain = -1;
+	state->current_extent = NULL;
+	state->current_offset = 0;
+}
+
+/* toi_extent_start_save
+ *
+ * Given a state and a struct hibernate_extent_state_store, save the current
+ * position in a format that can be used with relocated chains (at
+ * resume time).
+ */
+void toi_extent_state_save(struct hibernate_extent_iterate_state *state,
+		struct hibernate_extent_iterate_saved_state *saved_state)
+{
+	struct hibernate_extent *extent;
+
+	saved_state->chain_num = state->current_chain;
+	saved_state->extent_num = 0;
+	saved_state->offset = state->current_offset;
+
+	if (saved_state->chain_num == -1)
+		return;
+
+	extent = (state->chains + state->current_chain)->first;
+
+	while (extent != state->current_extent) {
+		saved_state->extent_num++;
+		extent = extent->next;
+	}
+}
+
+/* toi_extent_start_restore
+ *
+ * Restore the position saved by extent_state_save.
+ */
+void toi_extent_state_restore(struct hibernate_extent_iterate_state *state,
+		struct hibernate_extent_iterate_saved_state *saved_state)
+{
+	int posn = saved_state->extent_num;
+
+	if (saved_state->chain_num == -1) {
+		toi_extent_state_goto_start(state);
+		return;
+	}
+
+	state->current_chain = saved_state->chain_num;
+	state->current_extent = (state->chains + state->current_chain)->first;
+	state->current_offset = saved_state->offset;
+
+	while (posn--)
+		state->current_extent = state->current_extent->next;
+}
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(toi_add_to_extent_chain);
+EXPORT_SYMBOL_GPL(toi_put_extent_chain);
+EXPORT_SYMBOL_GPL(toi_load_extent_chain);
+EXPORT_SYMBOL_GPL(toi_serialise_extent_chain);
+EXPORT_SYMBOL_GPL(toi_extent_state_save);
+EXPORT_SYMBOL_GPL(toi_extent_state_restore);
+EXPORT_SYMBOL_GPL(toi_extent_state_goto_start);
+EXPORT_SYMBOL_GPL(toi_extent_state_next);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_extent.h linux-2.6-block-custom/kernel/power/tuxonice_extent.h
--- linux-2.6-block/kernel/power/tuxonice_extent.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_extent.h	2008-09-26 19:48:23.987790069 +0900
@@ -0,0 +1,72 @@
+/*
+ * kernel/power/tuxonice_extent.h
+ *
+ * Copyright (C) 2003-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations related to extents. Extents are
+ * TuxOnIce's method of storing some of the metadata for the image.
+ * See tuxonice_extent.c for more info.
+ *
+ */
+
+#include "tuxonice_modules.h"
+
+#ifndef EXTENT_H
+#define EXTENT_H
+
+struct hibernate_extent {
+	unsigned long start, end;
+	struct hibernate_extent *next;
+};
+
+struct hibernate_extent_chain {
+	int size; /* size of the chain ie sum (max-min+1) */
+	int num_extents;
+	struct hibernate_extent *first, *last_touched;
+};
+
+struct hibernate_extent_iterate_state {
+	struct hibernate_extent_chain *chains;
+	int num_chains;
+	int current_chain;
+	struct hibernate_extent *current_extent;
+	unsigned long current_offset;
+};
+
+struct hibernate_extent_iterate_saved_state {
+	int chain_num;
+	int extent_num;
+	unsigned long offset;
+};
+
+#define toi_extent_state_eof(state) \
+	((state)->num_chains == (state)->current_chain)
+
+/* Simplify iterating through all the values in an extent chain */
+#define toi_extent_for_each(extent_chain, extentpointer, value) \
+if ((extent_chain)->first) \
+	for ((extentpointer) = (extent_chain)->first, (value) = \
+			(extentpointer)->start; \
+	     ((extentpointer) && ((extentpointer)->next || (value) <= \
+				 (extentpointer)->end)); \
+	     (((value) == (extentpointer)->end) ? \
+		((extentpointer) = (extentpointer)->next, (value) = \
+		 ((extentpointer) ? (extentpointer)->start : 0)) : \
+			(value)++))
+
+void toi_put_extent_chain(struct hibernate_extent_chain *chain);
+int toi_add_to_extent_chain(struct hibernate_extent_chain *chain,
+		unsigned long start, unsigned long end);
+int toi_serialise_extent_chain(struct toi_module_ops *owner,
+		struct hibernate_extent_chain *chain);
+int toi_load_extent_chain(struct hibernate_extent_chain *chain);
+
+void toi_extent_state_save(struct hibernate_extent_iterate_state *state,
+		struct hibernate_extent_iterate_saved_state *saved_state);
+void toi_extent_state_restore(struct hibernate_extent_iterate_state *state,
+		struct hibernate_extent_iterate_saved_state *saved_state);
+void toi_extent_state_goto_start(struct hibernate_extent_iterate_state *state);
+unsigned long toi_extent_state_next(struct hibernate_extent_iterate_state *state);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_file.c linux-2.6-block-custom/kernel/power/tuxonice_file.c
--- linux-2.6-block/kernel/power/tuxonice_file.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_file.c	2008-09-26 21:00:37.121417596 +0900
@@ -0,0 +1,1126 @@
+/*
+ * kernel/power/tuxonice_file.c
+ *
+ * Copyright (C) 2005-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of a simple file as a
+ * backing store. It is based upon the swapallocator, and shares the
+ * same basic working. Here, though, we have nothing to do with
+ * swapspace, and only one device to worry about.
+ *
+ * The user can just
+ *
+ * echo TuxOnIce > /path/to/my_file
+ *
+ * dd if=/dev/zero bs=1M count=<file_size_desired> >> /path/to/my_file
+ *
+ * and
+ *
+ * echo /path/to/my_file > /sys/power/tuxonice/file/target
+ *
+ * then put what they find in /sys/power/tuxonice/resume
+ * as their resume= parameter in lilo.conf (and rerun lilo if using it).
+ *
+ * Having done this, they're ready to hibernate and resume.
+ *
+ * TODO:
+ * - File resizing.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/mount.h>
+#include <linux/statfs.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include <linux/root_dev.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_io.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_block_io.h"
+#include "tuxonice_alloc.h"
+
+static struct toi_module_ops toi_fileops;
+
+/* Details of our target.  */
+
+char toi_file_target[256];
+static struct inode *target_inode;
+static struct file *target_file;
+static struct block_device *toi_file_target_bdev;
+static dev_t resume_file_dev_t;
+static int used_devt;
+static int setting_toi_file_target;
+static sector_t target_firstblock, target_header_start;
+static int target_storage_available;
+static int target_claim;
+
+/* Old signatures */
+static char HaveImage[] = "HaveImage\n";
+static char NoImage[] =   "TuxOnIce\n";
+#define sig_size (sizeof(HaveImage) + 1)
+
+struct toi_file_header {
+	char sig[sig_size];
+	int resumed_before;
+	unsigned long first_header_block;
+	int have_image;
+};
+
+/* Header Page Information */
+static int header_pages_reserved;
+
+/* Main Storage Pages */
+static int main_pages_allocated, main_pages_requested;
+
+#define target_is_normal_file() (S_ISREG(target_inode->i_mode))
+
+static struct toi_bdev_info devinfo;
+
+/* Extent chain for blocks */
+static struct hibernate_extent_chain block_chain;
+
+/* Signature operations */
+enum {
+	GET_IMAGE_EXISTS,
+	INVALIDATE,
+	MARK_RESUME_ATTEMPTED,
+	UNMARK_RESUME_ATTEMPTED,
+};
+
+static void set_devinfo(struct block_device *bdev, int target_blkbits)
+{
+	devinfo.bdev = bdev;
+	if (!target_blkbits) {
+		devinfo.bmap_shift = devinfo.blocks_per_page = 0;
+	} else {
+		devinfo.bmap_shift = target_blkbits - 9;
+		devinfo.blocks_per_page = (1 << (PAGE_SHIFT - target_blkbits));
+	}
+}
+
+static long raw_to_real(long raw)
+{
+	long result;
+
+	result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
+		(PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
+		(PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
+
+	return result < 0 ? 0 : result;
+}
+
+static int toi_file_storage_available(void)
+{
+	int result = 0;
+	struct block_device *bdev = toi_file_target_bdev;
+
+	if (!target_inode)
+		return 0;
+
+	switch (target_inode->i_mode & S_IFMT) {
+	case S_IFSOCK:
+	case S_IFCHR:
+	case S_IFIFO: /* Socket, Char, Fifo */
+		return -1;
+	case S_IFREG: /* Regular file: current size - holes + free
+			 space on part */
+		result = target_storage_available;
+		break;
+	case S_IFBLK: /* Block device */
+		if (!bdev->bd_disk) {
+			printk(KERN_INFO "bdev->bd_disk null.\n");
+			return 0;
+		}
+
+		result = (bdev->bd_part ?
+			bdev->bd_part->nr_sects :
+                        get_capacity(bdev->bd_disk)) >> (PAGE_SHIFT - 9);
+//			bdev->part0->nr_sects) >> (PAGE_SHIFT - 9);
+	}
+
+	return raw_to_real(result);
+}
+
+static int has_contiguous_blocks(int page_num)
+{
+	int j;
+	sector_t last = 0;
+
+	for (j = 0; j < devinfo.blocks_per_page; j++) {
+		sector_t this = bmap(target_inode,
+				page_num * devinfo.blocks_per_page + j);
+
+		if (!this || (last && (last + 1) != this))
+			break;
+
+		last = this;
+	}
+
+	return j == devinfo.blocks_per_page;
+}
+
+static int size_ignoring_ignored_pages(void)
+{
+	int mappable = 0, i;
+
+	if (!target_is_normal_file())
+		return toi_file_storage_available();
+
+	for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT) ; i++)
+		if (has_contiguous_blocks(i))
+			mappable++;
+
+	return mappable;
+}
+
+static int __populate_block_list(int min, int max)
+{
+	if (test_action_state(TOI_TEST_BIO))
+		printk(KERN_INFO "Adding extent %d-%d.\n",
+			min << devinfo.bmap_shift,
+			((max + 1) << devinfo.bmap_shift) - 1);
+
+	return toi_add_to_extent_chain(&block_chain, min, max);
+}
+
+static int apply_header_reservation(void)
+{
+	int i;
+
+	/* Apply header space reservation */
+	toi_extent_state_goto_start(&toi_writer_posn);
+	toi_bio_ops.forward_one_page(1); /* To first page */
+
+	for (i = 0; i < header_pages_reserved; i++)
+		if (toi_bio_ops.forward_one_page(1))
+			return -ENOSPC;
+
+	/* The end of header pages will be the start of pageset 2 */
+	toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
+
+	return 0;
+}
+
+static int populate_block_list(void)
+{
+	int i, extent_min = -1, extent_max = -1, got_header = 0, result = 0;
+
+	if (block_chain.first)
+		toi_put_extent_chain(&block_chain);
+
+	if (!target_is_normal_file()) {
+		return (target_storage_available > 0) ?
+			__populate_block_list(devinfo.blocks_per_page,
+				(target_storage_available + 1) *
+				devinfo.blocks_per_page - 1) : 0;
+	}
+
+	for (i = 0; i < (target_inode->i_size >> PAGE_SHIFT); i++) {
+		sector_t new_sector;
+
+		if (!has_contiguous_blocks(i))
+			continue;
+
+		new_sector = bmap(target_inode,
+		(i * devinfo.blocks_per_page));
+
+		/*
+		 * Ignore the first block in the file.
+		 * It gets the header.
+		 */
+		if (new_sector == target_firstblock >> devinfo.bmap_shift) {
+			got_header = 1;
+			continue;
+		}
+
+		/*
+		 * I'd love to be able to fill in holes and resize
+		 * files, but not yet...
+		 */
+
+		if (new_sector == extent_max + 1)
+			extent_max += devinfo.blocks_per_page;
+		else {
+			if (extent_min > -1) {
+				result = __populate_block_list(extent_min,
+						extent_max);
+				if (result)
+					return result;
+			}
+
+			extent_min = new_sector;
+			extent_max = extent_min +
+				devinfo.blocks_per_page - 1;
+		}
+	}
+
+	if (extent_min > -1) {
+		result = __populate_block_list(extent_min, extent_max);
+		if (result)
+			return result;
+	}
+
+	return apply_header_reservation();
+}
+
+static void toi_file_cleanup(int finishing_cycle)
+{
+	if (toi_file_target_bdev) {
+		if (target_claim) {
+			bd_release(toi_file_target_bdev);
+			target_claim = 0;
+		}
+
+		if (used_devt) {
+			blkdev_put(toi_file_target_bdev);
+			used_devt = 0;
+		}
+		toi_file_target_bdev = NULL;
+		target_inode = NULL;
+		set_devinfo(NULL, 0);
+		target_storage_available = 0;
+	}
+
+	if (target_file > 0) {
+		filp_close(target_file, NULL);
+		target_file = NULL;
+	}
+}
+
+/*
+ * reopen_resume_devt
+ *
+ * Having opened resume= once, we remember the major and
+ * minor nodes and use them to reopen the bdev for checking
+ * whether an image exists (possibly when starting a resume).
+ */
+static void reopen_resume_devt(void)
+{
+	toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
+			FMODE_READ);
+	if (IS_ERR(toi_file_target_bdev)) {
+		printk(KERN_INFO "Got a dev_num (%lx) but failed to open it.\n",
+				(unsigned long) resume_file_dev_t);
+		return;
+	}
+	target_inode = toi_file_target_bdev->bd_inode;
+	set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
+}
+
+static void toi_file_get_target_info(char *target, int get_size,
+		int resume_param)
+{
+	if (target_file)
+		toi_file_cleanup(0);
+
+	if (!target || !strlen(target))
+		return;
+
+	target_file = filp_open(target, O_RDWR|O_LARGEFILE, 0);
+
+	if (IS_ERR(target_file) || !target_file) {
+
+		if (!resume_param) {
+			printk(KERN_INFO "Open file %s returned %p.\n",
+					target, target_file);
+			target_file = NULL;
+			return;
+		}
+
+		target_file = NULL;
+		resume_file_dev_t = name_to_dev_t(target);
+		if (!resume_file_dev_t) {
+			struct kstat stat;
+			int error = vfs_stat(target, &stat);
+			printk(KERN_INFO "Open file %s returned %p and "
+					"name_to_devt failed.\n", target,
+					target_file);
+			if (error)
+				printk(KERN_INFO "Stating the file also failed."
+					" Nothing more we can do.\n");
+			else
+				resume_file_dev_t = stat.rdev;
+			return;
+		}
+
+		toi_file_target_bdev = toi_open_by_devnum(resume_file_dev_t,
+				FMODE_READ);
+		if (IS_ERR(toi_file_target_bdev)) {
+			printk(KERN_INFO "Got a dev_num (%lx) but failed to "
+					"open it.\n",
+					(unsigned long) resume_file_dev_t);
+			return;
+		}
+		used_devt = 1;
+		target_inode = toi_file_target_bdev->bd_inode;
+	} else
+		target_inode = target_file->f_mapping->host;
+
+	if (S_ISLNK(target_inode->i_mode) || S_ISDIR(target_inode->i_mode) ||
+	    S_ISSOCK(target_inode->i_mode) || S_ISFIFO(target_inode->i_mode)) {
+		printk(KERN_INFO "File support works with regular files,"
+				" character files and block devices.\n");
+		goto cleanup;
+	}
+
+	if (!used_devt) {
+		if (S_ISBLK(target_inode->i_mode)) {
+			toi_file_target_bdev = I_BDEV(target_inode);
+			if (!bd_claim(toi_file_target_bdev, &toi_fileops))
+				target_claim = 1;
+		} else
+			toi_file_target_bdev = target_inode->i_sb->s_bdev;
+		resume_file_dev_t = toi_file_target_bdev->bd_dev;
+	}
+
+	set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
+
+	if (get_size)
+		target_storage_available = size_ignoring_ignored_pages();
+
+	if (!resume_param)
+		target_firstblock = bmap(target_inode, 0) << devinfo.bmap_shift;
+
+	return;
+cleanup:
+	target_inode = NULL;
+	if (target_file) {
+		filp_close(target_file, NULL);
+		target_file = NULL;
+	}
+	set_devinfo(NULL, 0);
+	target_storage_available = 0;
+}
+
+static void toi_file_noresume_reset(void)
+{
+	toi_bio_ops.rw_cleanup(READ);
+}
+
+static int parse_signature(struct toi_file_header *header)
+{
+	int have_image = !memcmp(HaveImage, header->sig, sizeof(HaveImage) - 1);
+	int no_image_header = !memcmp(NoImage, header->sig,
+			sizeof(NoImage) - 1);
+	int binary_sig = !memcmp(tuxonice_signature, header->sig,
+			sizeof(tuxonice_signature));
+
+	if (no_image_header || (binary_sig && !header->have_image))
+		return 0;
+
+	if (!have_image && !binary_sig)
+		return -1;
+
+	if (header->resumed_before)
+		set_toi_state(TOI_RESUMED_BEFORE);
+	else
+		clear_toi_state(TOI_RESUMED_BEFORE);
+
+	target_header_start = header->first_header_block;
+	return 1;
+}
+
+/* prepare_signature */
+
+static int prepare_signature(struct toi_file_header *current_header,
+		unsigned long first_header_block)
+{
+	strncpy(current_header->sig, tuxonice_signature,
+			sizeof(tuxonice_signature));
+	current_header->resumed_before = 0;
+	current_header->first_header_block = first_header_block;
+	current_header->have_image = 1;
+	return 0;
+}
+
+static int toi_file_storage_allocated(void)
+{
+	if (!target_inode)
+		return 0;
+
+	if (target_is_normal_file())
+		return (int) raw_to_real(target_storage_available);
+	else
+		return (int) raw_to_real(main_pages_requested);
+}
+
+static int toi_file_release_storage(void)
+{
+	if (test_action_state(TOI_KEEP_IMAGE) &&
+	    test_toi_state(TOI_NOW_RESUMING))
+		return 0;
+
+	toi_put_extent_chain(&block_chain);
+
+	header_pages_reserved = 0;
+	main_pages_allocated = 0;
+	main_pages_requested = 0;
+	return 0;
+}
+
+static void toi_file_reserve_header_space(int request)
+{
+	header_pages_reserved = request;
+	apply_header_reservation();
+}
+
+static int toi_file_allocate_storage(int main_space_requested)
+{
+	int result = 0;
+
+	int extra_pages = DIV_ROUND_UP(main_space_requested *
+			(sizeof(unsigned long) + sizeof(int)), PAGE_SIZE);
+	int pages_to_get = main_space_requested + extra_pages +
+		header_pages_reserved;
+	int blocks_to_get = pages_to_get - block_chain.size;
+
+	/* Only release_storage reduces the size */
+	if (blocks_to_get < 1)
+		return 0;
+
+	result = populate_block_list();
+
+	if (result)
+		return result;
+
+	toi_message(TOI_WRITER, TOI_MEDIUM, 0,
+		"Finished with block_chain.size == %d.\n",
+		block_chain.size);
+
+	if (block_chain.size < pages_to_get) {
+		printk("Block chain size (%d) < header pages (%d) + extra "
+			"pages (%d) + main pages (%d) (=%d pages).\n",
+			block_chain.size, header_pages_reserved, extra_pages,
+			main_space_requested, pages_to_get);
+		result = -ENOSPC;
+	}
+
+	main_pages_requested = main_space_requested;
+	main_pages_allocated = main_space_requested + extra_pages;
+	return result;
+}
+
+static int toi_file_write_header_init(void)
+{
+	int result;
+
+	toi_bio_ops.rw_init(WRITE, 0);
+	toi_writer_buffer_posn = 0;
+
+	/* Info needed to bootstrap goes at the start of the header.
+	 * First we save the basic info needed for reading, including the number
+	 * of header pages. Then we save the structs containing data needed
+	 * for reading the header pages back.
+	 * Note that even if header pages take more than one page, when we
+	 * read back the info, we will have restored the location of the
+	 * next header page by the time we go to use it.
+	 */
+
+	result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
+			(char *) &toi_writer_posn_save,
+			sizeof(toi_writer_posn_save));
+
+	if (result)
+		return result;
+
+	result = toi_bio_ops.rw_header_chunk(WRITE, &toi_fileops,
+			(char *) &devinfo, sizeof(devinfo));
+
+	if (result)
+		return result;
+
+	toi_serialise_extent_chain(&toi_fileops, &block_chain);
+
+	return 0;
+}
+
+static int toi_file_write_header_cleanup(void)
+{
+	struct toi_file_header *header;
+	int result;
+	unsigned long sig_page = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+
+	/* Write any unsaved data */
+	if (toi_writer_buffer_posn)
+		toi_bio_ops.write_header_chunk_finish();
+
+	toi_bio_ops.finish_all_io();
+
+	toi_extent_state_goto_start(&toi_writer_posn);
+	toi_bio_ops.forward_one_page(1);
+
+	/* Adjust image header */
+	result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
+			target_firstblock,
+			virt_to_page(sig_page));
+	if (result)
+		goto out;
+
+	header = (struct toi_file_header *) sig_page;
+
+	prepare_signature(header,
+			toi_writer_posn.current_offset <<
+			devinfo.bmap_shift);
+
+	result = toi_bio_ops.bdev_page_io(WRITE, toi_file_target_bdev,
+			target_firstblock,
+			virt_to_page(sig_page));
+
+out:
+	toi_bio_ops.finish_all_io();
+	toi_free_page(38, sig_page);
+
+	return result;
+}
+
+/* HEADER READING */
+
+/*
+ * read_header_init()
+ *
+ * Description:
+ * 1. Attempt to read the device specified with resume=.
+ * 2. Check the contents of the header for our signature.
+ * 3. Warn, ignore, reset and/or continue as appropriate.
+ * 4. If continuing, read the toi_file configuration section
+ *    of the header and set up block device info so we can read
+ *    the rest of the header & image.
+ *
+ * Returns:
+ * May not return if user choose to reboot at a warning.
+ * -EINVAL if cannot resume at this time. Booting should continue
+ * normally.
+ */
+
+static int toi_file_read_header_init(void)
+{
+	int result;
+	struct block_device *tmp;
+
+	toi_bio_ops.read_header_init();
+
+	/* Read toi_file configuration */
+	result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
+			target_header_start,
+			virt_to_page((unsigned long) toi_writer_buffer));
+
+	if (result) {
+		printk("FileAllocator read header init: Failed to initialise "
+				"reading the first page of data.\n");
+		toi_bio_ops.rw_cleanup(READ);
+		return result;
+	}
+
+	memcpy(&toi_writer_posn_save, toi_writer_buffer,
+	       sizeof(toi_writer_posn_save));
+
+	toi_writer_buffer_posn = sizeof(toi_writer_posn_save);
+
+	tmp = devinfo.bdev;
+
+	memcpy(&devinfo,
+	       toi_writer_buffer + toi_writer_buffer_posn,
+	       sizeof(devinfo));
+
+	devinfo.bdev = tmp;
+	toi_writer_buffer_posn += sizeof(devinfo);
+
+	toi_extent_state_goto_start(&toi_writer_posn);
+	toi_bio_ops.set_extra_page_forward();
+
+	return toi_load_extent_chain(&block_chain);
+}
+
+static int toi_file_read_header_cleanup(void)
+{
+	toi_bio_ops.rw_cleanup(READ);
+	return 0;
+}
+
+static int toi_file_signature_op(int op)
+{
+	char *cur;
+	int result = 0, changed = 0;
+	struct toi_file_header *header;
+
+	if (toi_file_target_bdev <= 0)
+		return -1;
+
+	cur = (char *) toi_get_zeroed_page(17, TOI_ATOMIC_GFP);
+	if (!cur) {
+		printk("Unable to allocate a page for reading the image "
+				"signature.\n");
+		return -ENOMEM;
+	}
+
+	result = toi_bio_ops.bdev_page_io(READ, toi_file_target_bdev,
+			target_firstblock,
+			virt_to_page(cur));
+
+	if (result)
+		goto out;
+
+	header = (struct toi_file_header *) cur;
+	result = parse_signature(header);
+
+	switch (op) {
+	case INVALIDATE:
+		if (result == -1)
+			goto out;
+
+		memcpy(header->sig, tuxonice_signature,
+				sizeof(tuxonice_signature));
+		header->resumed_before = 0;
+		header->have_image = 0;
+		result = changed = 1;
+		break;
+	case MARK_RESUME_ATTEMPTED:
+		if (result == 1) {
+			header->resumed_before = 1;
+			changed = 1;
+		}
+		break;
+	case UNMARK_RESUME_ATTEMPTED:
+		if (result == 1) {
+			header->resumed_before = 0;
+			changed = 1;
+		}
+		break;
+	}
+
+	if (changed) {
+		int io_result = toi_bio_ops.bdev_page_io(WRITE,
+				toi_file_target_bdev, target_firstblock,
+				virt_to_page(cur));
+		if (io_result)
+			result = io_result;
+	}
+
+out:
+	toi_bio_ops.finish_all_io();
+	toi_free_page(17, (unsigned long) cur);
+	return result;
+}
+
+/* Print debug info
+ *
+ * Description:
+ */
+
+static int toi_file_print_debug_stats(char *buffer, int size)
+{
+	int len = 0;
+
+	if (toiActiveAllocator != &toi_fileops) {
+		len = snprintf_used(buffer, size,
+				"- FileAllocator inactive.\n");
+		return len;
+	}
+
+	len = snprintf_used(buffer, size, "- FileAllocator active.\n");
+
+	len += snprintf_used(buffer+len, size-len, "  Storage available for "
+			"image: %ld pages.\n",
+			toi_file_storage_allocated());
+
+	return len;
+}
+
+/*
+ * Storage needed
+ *
+ * Returns amount of space in the image header required
+ * for the toi_file's data.
+ *
+ * We ensure the space is allocated, but actually save the
+ * data from write_header_init and therefore don't also define a
+ * save_config_info routine.
+ */
+static int toi_file_storage_needed(void)
+{
+	return sig_size + strlen(toi_file_target) + 1 +
+		sizeof(toi_writer_posn_save) +
+		sizeof(devinfo) +
+		sizeof(struct hibernate_extent_chain) - 2 * sizeof(void *) +
+		(2 * sizeof(unsigned long) * block_chain.num_extents);
+}
+
+/*
+ * toi_file_remove_image
+ *
+ */
+static int toi_file_remove_image(void)
+{
+	toi_file_release_storage();
+	return toi_file_signature_op(INVALIDATE);
+}
+
+/*
+ * Image_exists
+ *
+ */
+
+static int toi_file_image_exists(int quiet)
+{
+	if (!toi_file_target_bdev)
+		reopen_resume_devt();
+
+	return toi_file_signature_op(GET_IMAGE_EXISTS);
+}
+
+/*
+ * Mark resume attempted.
+ *
+ * Record that we tried to resume from this image.
+ */
+
+static int toi_file_mark_resume_attempted(int mark)
+{
+	return toi_file_signature_op(mark ? MARK_RESUME_ATTEMPTED:
+		UNMARK_RESUME_ATTEMPTED);
+}
+
+static void toi_file_set_resume_param(void)
+{
+	char *buffer = (char *) toi_get_zeroed_page(18, TOI_ATOMIC_GFP);
+	char *buffer2 = (char *) toi_get_zeroed_page(19, TOI_ATOMIC_GFP);
+	unsigned long sector = bmap(target_inode, 0);
+	int offset = 0;
+
+	if (!buffer || !buffer2) {
+		if (buffer)
+			toi_free_page(18, (unsigned long) buffer);
+		if (buffer2)
+			toi_free_page(19, (unsigned long) buffer2);
+		printk("TuxOnIce: Failed to allocate memory while setting "
+				"resume= parameter.\n");
+		return;
+	}
+
+	if (toi_file_target_bdev) {
+		set_devinfo(toi_file_target_bdev, target_inode->i_blkbits);
+
+		bdevname(toi_file_target_bdev, buffer2);
+		offset += snprintf(buffer + offset, PAGE_SIZE - offset,
+				"/dev/%s", buffer2);
+
+		if (sector)
+			offset += snprintf(buffer + offset, PAGE_SIZE - offset,
+				":0x%lx", sector << devinfo.bmap_shift);
+	} else
+		offset += snprintf(buffer + offset, PAGE_SIZE - offset,
+				"%s is not a valid target.", toi_file_target);
+
+	sprintf(resume_file, "file:%s", buffer);
+
+	toi_free_page(18, (unsigned long) buffer);
+	toi_free_page(19, (unsigned long) buffer2);
+
+	toi_attempt_to_parse_resume_device(1);
+}
+
+static int __test_toi_file_target(char *target, int resume_time, int quiet)
+{
+	toi_file_get_target_info(target, 0, resume_time);
+	if (toi_file_signature_op(GET_IMAGE_EXISTS) > -1) {
+		if (!quiet)
+			printk(KERN_INFO "TuxOnIce: FileAllocator: File "
+					"signature found.\n");
+		if (!resume_time)
+			toi_file_set_resume_param();
+
+		toi_bio_ops.set_devinfo(&devinfo);
+		toi_writer_posn.chains = &block_chain;
+		toi_writer_posn.num_chains = 1;
+
+		if (!resume_time)
+			set_toi_state(TOI_CAN_HIBERNATE);
+		return 0;
+	}
+
+	clear_toi_state(TOI_CAN_HIBERNATE);
+
+	if (quiet)
+		return 1;
+
+	if (*target)
+		printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. No signature "
+				"found at  %s.\n", target);
+	else
+		if (!resume_time)
+			printk(KERN_INFO "TuxOnIce: FileAllocator: Sorry. "
+					"Target is not set for hibernating.\n");
+
+	return 1;
+}
+
+static void test_toi_file_target(void)
+{
+	setting_toi_file_target = 1;
+
+	printk(KERN_INFO "TuxOnIce: Hibernating %sabled.\n",
+			__test_toi_file_target(toi_file_target, 0, 1) ?
+			"dis" : "en");
+
+	setting_toi_file_target = 0;
+}
+
+/*
+ * Parse Image Location
+ *
+ * Attempt to parse a resume= parameter.
+ * File Allocator accepts:
+ * resume=file:DEVNAME[:FIRSTBLOCK]
+ *
+ * Where:
+ * DEVNAME is convertable to a dev_t by name_to_dev_t
+ * FIRSTBLOCK is the location of the first block in the file.
+ * BLOCKSIZE is the logical blocksize >= SECTOR_SIZE & <= PAGE_SIZE,
+ * mod SECTOR_SIZE == 0 of the device.
+ * Data is validated by attempting to read a header from the
+ * location given. Failure will result in toi_file refusing to
+ * save an image, and a reboot with correct parameters will be
+ * necessary.
+ */
+
+static int toi_file_parse_sig_location(char *commandline,
+		int only_writer, int quiet)
+{
+	char *thischar, *devstart = NULL, *colon = NULL, *at_symbol = NULL;
+	int result = -EINVAL, target_blocksize = 0;
+
+	if (strncmp(commandline, "file:", 5)) {
+		if (!only_writer)
+			return 1;
+	} else
+		commandline += 5;
+
+	/*
+	 * Don't check signature again if we're beginning a cycle. If we already
+	 * did the initialisation successfully, assume we'll be okay when it
+	 * comes to resuming.
+	 */
+	if (toi_file_target_bdev)
+		return 0;
+
+	devstart = thischar = commandline;
+	while ((*thischar != ':') && (*thischar != '@') &&
+		((thischar - commandline) < 250) && (*thischar))
+		thischar++;
+
+	if (*thischar == ':') {
+		colon = thischar;
+		*colon = 0;
+		thischar++;
+	}
+
+	while ((*thischar != '@') && ((thischar - commandline) < 250)
+			&& (*thischar))
+		thischar++;
+
+	if (*thischar == '@') {
+		at_symbol = thischar;
+		*at_symbol = 0;
+	}
+
+	/*
+	 * For the toi_file, you can be able to resume, but not hibernate,
+	 * because the resume= is set correctly, but the toi_file_target
+	 * isn't.
+	 *
+	 * We may have come here as a result of setting resume or
+	 * toi_file_target. We only test the toi_file target in the
+	 * former case (it's already done in the later), and we do it before
+	 * setting the block number ourselves. It will overwrite the values
+	 * given on the command line if we don't.
+	 */
+
+	if (!setting_toi_file_target)
+		__test_toi_file_target(toi_file_target, 1, 0);
+
+	if (colon)
+		target_firstblock = (int) simple_strtoul(colon + 1, NULL, 0);
+	else
+		target_firstblock = 0;
+
+	if (at_symbol) {
+		target_blocksize = (int) simple_strtoul(at_symbol + 1, NULL, 0);
+		if (target_blocksize & (SECTOR_SIZE - 1)) {
+			printk(KERN_INFO "FileAllocator: Blocksizes are "
+					"multiples of %d.\n", SECTOR_SIZE);
+			result = -EINVAL;
+			goto out;
+		}
+	}
+
+	if (!quiet)
+		printk(KERN_INFO "TuxOnIce FileAllocator: Testing whether you"
+				" can resume:\n");
+
+	toi_file_get_target_info(commandline, 0, 1);
+
+	if (!toi_file_target_bdev || IS_ERR(toi_file_target_bdev)) {
+		toi_file_target_bdev = NULL;
+		result = -1;
+		goto out;
+	}
+
+	if (target_blocksize)
+		set_devinfo(toi_file_target_bdev, ffs(target_blocksize));
+
+	result = __test_toi_file_target(commandline, 1, 0);
+
+out:
+	if (result)
+		clear_toi_state(TOI_CAN_HIBERNATE);
+
+	if (!quiet)
+		printk(KERN_INFO "Resuming %sabled.\n",  result ? "dis" : "en");
+
+	if (colon)
+		*colon = ':';
+	if (at_symbol)
+		*at_symbol = '@';
+
+	return result;
+}
+
+/* toi_file_save_config_info
+ *
+ * Description:	Save the target's name, not for resume time, but for
+ * 		all_settings.
+ * Arguments:	Buffer:		Pointer to a buffer of size PAGE_SIZE.
+ * Returns:	Number of bytes used for saving our data.
+ */
+
+static int toi_file_save_config_info(char *buffer)
+{
+	strcpy(buffer, toi_file_target);
+	return strlen(toi_file_target) + 1;
+}
+
+/* toi_file_load_config_info
+ *
+ * Description:	Reload target's name.
+ * Arguments:	Buffer:		Pointer to the start of the data.
+ *		Size:		Number of bytes that were saved.
+ */
+
+static void toi_file_load_config_info(char *buffer, int size)
+{
+	strcpy(toi_file_target, buffer);
+}
+
+static int toi_file_initialise(int starting_cycle)
+{
+	if (starting_cycle) {
+		if (toiActiveAllocator != &toi_fileops)
+			return 0;
+
+		if (starting_cycle & SYSFS_HIBERNATE && !*toi_file_target) {
+			printk(KERN_INFO "FileAllocator is the active writer,  "
+					"but no filename has been set.\n");
+			return 1;
+		}
+	}
+
+	if (*toi_file_target)
+		toi_file_get_target_info(toi_file_target, starting_cycle, 0);
+
+	if (starting_cycle && (toi_file_image_exists(1) == -1)) {
+		printk("%s is does not have a valid signature for "
+				"hibernating.\n", toi_file_target);
+		return 1;
+	}
+
+	return 0;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+
+	{
+	 TOI_ATTR("target", SYSFS_RW),
+	 SYSFS_STRING(toi_file_target, 256, SYSFS_NEEDS_SM_FOR_WRITE),
+	 .write_side_effect		= test_toi_file_target,
+	},
+
+	{
+	  TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&toi_fileops.enabled, 0, 1, 0),
+	  .write_side_effect		= attempt_to_parse_resume_device2,
+	}
+};
+
+static struct toi_module_ops toi_fileops = {
+	.type					= WRITER_MODULE,
+	.name					= "file storage",
+	.directory				= "file",
+	.module					= THIS_MODULE,
+	.print_debug_info			= toi_file_print_debug_stats,
+	.save_config_info			= toi_file_save_config_info,
+	.load_config_info			= toi_file_load_config_info,
+	.storage_needed				= toi_file_storage_needed,
+	.initialise				= toi_file_initialise,
+	.cleanup				= toi_file_cleanup,
+
+	.noresume_reset		= toi_file_noresume_reset,
+	.storage_available 	= toi_file_storage_available,
+	.storage_allocated	= toi_file_storage_allocated,
+	.release_storage	= toi_file_release_storage,
+	.reserve_header_space	= toi_file_reserve_header_space,
+	.allocate_storage	= toi_file_allocate_storage,
+	.image_exists		= toi_file_image_exists,
+	.mark_resume_attempted	= toi_file_mark_resume_attempted,
+	.write_header_init	= toi_file_write_header_init,
+	.write_header_cleanup	= toi_file_write_header_cleanup,
+	.read_header_init	= toi_file_read_header_init,
+	.read_header_cleanup	= toi_file_read_header_cleanup,
+	.remove_image		= toi_file_remove_image,
+	.parse_sig_location	= toi_file_parse_sig_location,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_file_load(void)
+{
+	toi_fileops.rw_init = toi_bio_ops.rw_init;
+	toi_fileops.rw_cleanup = toi_bio_ops.rw_cleanup;
+	toi_fileops.read_page = toi_bio_ops.read_page;
+	toi_fileops.write_page = toi_bio_ops.write_page;
+	toi_fileops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
+	toi_fileops.rw_header_chunk_noreadahead =
+		toi_bio_ops.rw_header_chunk_noreadahead;
+	toi_fileops.io_flusher = toi_bio_ops.io_flusher;
+
+	return toi_register_module(&toi_fileops);
+}
+
+#ifdef MODULE
+static __exit void toi_file_unload(void)
+{
+	toi_unregister_module(&toi_fileops);
+}
+
+module_init(toi_file_load);
+module_exit(toi_file_unload);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("TuxOnIce FileAllocator");
+#else
+late_initcall(toi_file_load);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_highlevel.c linux-2.6-block-custom/kernel/power/tuxonice_highlevel.c
--- linux-2.6-block/kernel/power/tuxonice_highlevel.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_highlevel.c	2008-09-26 19:48:23.991754749 +0900
@@ -0,0 +1,1327 @@
+/*
+ * kernel/power/tuxonice_highlevel.c
+ */
+/** \mainpage TuxOnIce.
+ *
+ * TuxOnIce provides support for saving and restoring an image of
+ * system memory to an arbitrary storage device, either on the local computer,
+ * or across some network. The support is entirely OS based, so TuxOnIce
+ * works without requiring BIOS, APM or ACPI support. The vast majority of the
+ * code is also architecture independant, so it should be very easy to port
+ * the code to new architectures. TuxOnIce includes support for SMP, 4G HighMem
+ * and preemption. Initramfses and initrds are also supported.
+ *
+ * TuxOnIce uses a modular design, in which the method of storing the image is
+ * completely abstracted from the core code, as are transformations on the data
+ * such as compression and/or encryption (multiple 'modules' can be used to
+ * provide arbitrary combinations of functionality). The user interface is also
+ * modular, so that arbitrarily simple or complex interfaces can be used to
+ * provide anything from debugging information through to eye candy.
+ *
+ * \section Copyright
+ *
+ * TuxOnIce is released under the GPLv2.
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu><BR>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz><BR>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr><BR>
+ * Copyright (C) 2002-2007 Nigel Cunningham (nigel at tuxonice net)<BR>
+ *
+ * \section Credits
+ *
+ * Nigel would like to thank the following people for their work:
+ *
+ * Bernard Blackham <bernard@blackham.com.au><BR>
+ * Web page & Wiki administration, some coding. A person without whom
+ * TuxOnIce would not be where it is.
+ *
+ * Michael Frank <mhf@linuxmail.org><BR>
+ * Extensive testing and help with improving stability. I was constantly
+ * amazed by the quality and quantity of Michael's help.
+ *
+ * Pavel Machek <pavel@ucw.cz><BR>
+ * Modifications, defectiveness pointing, being with Gabor at the very
+ * beginning, suspend to swap space, stop all tasks. Port to 2.4.18-ac and
+ * 2.5.17. Even though Pavel and I disagree on the direction suspend to
+ * disk should take, I appreciate the valuable work he did in helping Gabor
+ * get the concept working.
+ *
+ * ..and of course the myriads of TuxOnIce users who have helped diagnose
+ * and fix bugs, made suggestions on how to improve the code, proofread
+ * documentation, and donated time and money.
+ *
+ * Thanks also to corporate sponsors:
+ *
+ * <B>Redhat.</B>Sometime employer from May 2006 (my fault, not Redhat's!).
+ *
+ * <B>Cyclades.com.</B> Nigel's employers from Dec 2004 until May 2006, who
+ * allowed him to work on TuxOnIce and PM related issues on company time.
+ *
+ * <B>LinuxFund.org.</B> Sponsored Nigel's work on TuxOnIce for four months Oct
+ * 2003 to Jan 2004.
+ *
+ * <B>LAC Linux.</B> Donated P4 hardware that enabled development and ongoing
+ * maintenance of SMP and Highmem support.
+ *
+ * <B>OSDL.</B> Provided access to various hardware configurations, make
+ * occasional small donations to the project.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+#include <linux/utsrelease.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/writeback.h>
+
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_cluster.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_atomic_copy.h"
+#include "tuxonice_alloc.h"
+
+/*! Pageset metadata. */
+struct pagedir pagedir2 = {2};
+
+static int get_pmsem = 0, got_pmsem;
+static mm_segment_t oldfs;
+static DEFINE_MUTEX(tuxonice_in_use);
+static int block_dump_save;
+static char pre_hibernate_command[256];
+static char post_hibernate_command[256];
+
+char *tuxonice_signature = "\xed\xc3\x02\xe9\x98\x56\xe5\x0c";
+
+int toi_fail_num;
+
+int do_toi_step(int step);
+
+unsigned long boot_kernel_data_buffer;
+
+/**
+ * toi_finish_anything - Cleanup after doing anything.
+ *
+ * @toi_or_resume: Whether finishing a cycle or attempt at resuming.
+ *
+ * This is our basic clean-up routine, matching start_anything below. We
+ * call cleanup routines, drop module references and restore process fs and
+ * cpus allowed masks, together with the global block_dump variable's value.
+ */
+void toi_finish_anything(int hibernate_or_resume)
+{
+	toi_cleanup_modules(hibernate_or_resume);
+	toi_put_modules();
+	if (hibernate_or_resume) {
+		block_dump = block_dump_save;
+		set_cpus_allowed(current, CPU_MASK_ALL);
+		toi_alloc_print_debug_stats();
+
+		if (hibernate_or_resume == SYSFS_HIBERNATE &&
+				strlen(post_hibernate_command))
+			toi_launch_userspace_program(post_hibernate_command,
+					0, UMH_WAIT_PROC);
+	}
+
+	set_fs(oldfs);
+	mutex_unlock(&tuxonice_in_use);
+}
+
+/**
+ * toi_start_anything - Basic initialisation for TuxOnIce.
+ *
+ * @toi_or_resume: Whether starting a cycle or attempt at resuming.
+ *
+ * Our basic initialisation routine. Take references on modules, use the
+ * kernel segment, recheck resume= if no active allocator is set, initialise
+ * modules, save and reset block_dump and ensure we're running on CPU0.
+ */
+int toi_start_anything(int hibernate_or_resume)
+{
+	mutex_lock(&tuxonice_in_use);
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+
+	if (hibernate_or_resume == SYSFS_HIBERNATE &&
+			strlen(pre_hibernate_command)) {
+		int result = toi_launch_userspace_program(pre_hibernate_command,
+				0, UMH_WAIT_PROC);
+		if (result) {
+			printk(KERN_INFO "Pre-hibernate command '%s' returned "
+					"%d. Aborting.\n",
+					pre_hibernate_command, result);
+			goto prehibernate_err;
+		}
+	}
+
+	if (hibernate_or_resume == SYSFS_HIBERNATE)
+		toi_print_modules();
+
+	if (toi_get_modules()) {
+		printk("TuxOnIce: Get modules failed!\n");
+		goto getmodules_err;
+	}
+
+	if (hibernate_or_resume) {
+		block_dump_save = block_dump;
+		block_dump = 0;
+		set_cpus_allowed(current,
+				cpumask_of_cpu(first_cpu(cpu_online_map)));
+	}
+
+	if (toi_initialise_modules_early(hibernate_or_resume))
+		goto early_init_err;
+
+	if (!toiActiveAllocator)
+		toi_attempt_to_parse_resume_device(!hibernate_or_resume);
+
+	if (toi_initialise_modules_late(hibernate_or_resume))
+		goto late_init_err;
+
+	return 0;
+
+late_init_err:
+	toi_cleanup_modules(hibernate_or_resume);
+early_init_err:
+	if (hibernate_or_resume) {
+		block_dump_save = block_dump;
+		set_cpus_allowed(current, CPU_MASK_ALL);
+	}
+getmodules_err:
+prehibernate_err:
+	set_fs(oldfs);
+	mutex_unlock(&tuxonice_in_use);
+	return -EBUSY;
+}
+
+/*
+ * Nosave page tracking.
+ *
+ * Here rather than in prepare_image because we want to do it once only at the
+ * start of a cycle.
+ */
+
+/**
+ * mark_nosave_pages - Set up our Nosave bitmap.
+ *
+ * Build a bitmap of Nosave pages from the list. The bitmap allows faster
+ * use when preparing the image.
+ */
+static void mark_nosave_pages(void)
+{
+	struct nosave_region *region;
+
+	list_for_each_entry(region, &nosave_regions, list) {
+		unsigned long pfn;
+
+		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
+			SetPageNosave(pfn_to_page(pfn));
+	}
+}
+
+/**
+ * allocate_bitmaps: Allocate bitmaps used to record page states.
+ *
+ * Allocate the bitmaps we use to record the various TuxOnIce related
+ * page states.
+ */
+static int allocate_bitmaps(void)
+{
+	if (allocate_dyn_pageflags(&pageset1_map, 0) ||
+	    allocate_dyn_pageflags(&pageset1_copy_map, 0) ||
+	    allocate_dyn_pageflags(&pageset2_map, 0) ||
+	    allocate_dyn_pageflags(&io_map, 0) ||
+	    allocate_dyn_pageflags(&nosave_map, 0) ||
+	    allocate_dyn_pageflags(&free_map, 0) ||
+	    allocate_dyn_pageflags(&page_resave_map, 0))
+		return 1;
+
+	return 0;
+}
+
+/**
+ * free_bitmaps: Free the bitmaps used to record page states.
+ *
+ * Free the bitmaps allocated above. It is not an error to call
+ * free_dyn_pageflags on a bitmap that isn't currentyl allocated.
+ */
+static void free_bitmaps(void)
+{
+	free_dyn_pageflags(&pageset1_map);
+	free_dyn_pageflags(&pageset1_copy_map);
+	free_dyn_pageflags(&pageset2_map);
+	free_dyn_pageflags(&io_map);
+	free_dyn_pageflags(&nosave_map);
+	free_dyn_pageflags(&free_map);
+	free_dyn_pageflags(&page_resave_map);
+}
+
+/**
+ * io_MB_per_second: Return the number of MB/s read or written.
+ *
+ * @write: Whether to return the speed at which we wrote.
+ *
+ * Calculate the number of megabytes per second that were read or written.
+ */
+static int io_MB_per_second(int write)
+{
+	return (toi_bkd.toi_io_time[write][1]) ?
+		MB((unsigned long) toi_bkd.toi_io_time[write][0]) * HZ /
+		toi_bkd.toi_io_time[write][1] : 0;
+}
+
+/**
+ * get_debug_info: Fill a buffer with debugging information.
+ *
+ * @buffer: The buffer to be filled.
+ * @count: The size of the buffer, in bytes.
+ *
+ * Fill a (usually PAGE_SIZEd) buffer with the debugging info that we will
+ * either printk or return via sysfs.
+ */
+#define SNPRINTF(a...) 	do { len += snprintf_used(((char *) buffer) + len, \
+		count - len - 1, ## a); } while (0)
+
+static int get_toi_debug_info(const char *buffer, int count)
+{
+	int len = 0;
+
+	SNPRINTF("TuxOnIce debugging info:\n");
+	SNPRINTF("- TuxOnIce core  : " TOI_CORE_VERSION "\n");
+	SNPRINTF("- Kernel Version : " UTS_RELEASE "\n");
+	SNPRINTF("- Compiler vers. : %d.%d\n", __GNUC__, __GNUC_MINOR__);
+	SNPRINTF("- Attempt number : %d\n", nr_hibernates);
+	SNPRINTF("- Parameters     : %ld %ld %ld %d %d %ld\n",
+			toi_result,
+			toi_bkd.toi_action,
+			toi_bkd.toi_debug_state,
+			toi_bkd.toi_default_console_level,
+			image_size_limit,
+			toi_poweroff_method);
+	SNPRINTF("- Overall expected compression percentage: %d.\n",
+			100 - toi_expected_compression_ratio());
+	len += toi_print_module_debug_info(((char *) buffer) + len,
+			count - len - 1);
+	if (toi_bkd.toi_io_time[0][1]) {
+		if ((io_MB_per_second(0) < 5) || (io_MB_per_second(1) < 5)) {
+			SNPRINTF("- I/O speed: Write %d KB/s",
+			  (KB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+			  toi_bkd.toi_io_time[0][1]));
+			if (toi_bkd.toi_io_time[1][1])
+				SNPRINTF(", Read %d KB/s",
+				  (KB((unsigned long)
+				      toi_bkd.toi_io_time[1][0]) * HZ /
+				  toi_bkd.toi_io_time[1][1]));
+		} else {
+			SNPRINTF("- I/O speed: Write %d MB/s",
+			 (MB((unsigned long) toi_bkd.toi_io_time[0][0]) * HZ /
+			  toi_bkd.toi_io_time[0][1]));
+			if (toi_bkd.toi_io_time[1][1])
+				SNPRINTF(", Read %d MB/s",
+				 (MB((unsigned long)
+				     toi_bkd.toi_io_time[1][0]) * HZ /
+				  toi_bkd.toi_io_time[1][1]));
+		}
+		SNPRINTF(".\n");
+	} else
+		SNPRINTF("- No I/O speed stats available.\n");
+	SNPRINTF("- Extra pages    : %ld used/%ld.\n",
+			extra_pd1_pages_used, extra_pd1_pages_allowance);
+
+	return len;
+}
+
+/**
+ * do_cleanup: Cleanup after attempting to hibernate or resume.
+ *
+ * @get_debug_info: Whether to allocate and return debugging info.
+ *
+ * Cleanup after attempting to hibernate or resume, possibly getting
+ * debugging info as we do so.
+ */
+static void do_cleanup(int get_debug_info)
+{
+	int i = 0;
+	char *buffer = NULL;
+
+	if (get_debug_info)
+		toi_prepare_status(DONT_CLEAR_BAR, "Cleaning up...");
+	relink_lru_lists();
+
+	free_checksum_pages();
+
+	if (get_debug_info)
+		buffer = (char *) toi_get_zeroed_page(20, TOI_ATOMIC_GFP);
+
+	if (buffer)
+		i = get_toi_debug_info(buffer, PAGE_SIZE);
+
+	toi_free_extra_pagedir_memory();
+
+	pagedir1.size = pagedir2.size = 0;
+	set_highmem_size(pagedir1, 0);
+	set_highmem_size(pagedir2, 0);
+
+	if (boot_kernel_data_buffer) {
+		toi_free_page(37, boot_kernel_data_buffer);
+		boot_kernel_data_buffer = 0;
+	}
+
+	if (test_toi_state(TOI_NOTIFIERS_PREPARE)) {
+		pm_notifier_call_chain(PM_POST_HIBERNATION);
+		clear_toi_state(TOI_NOTIFIERS_PREPARE);
+	}
+
+	thaw_processes();
+
+#ifdef CONFIG_TOI_KEEP_IMAGE
+	if (test_action_state(TOI_KEEP_IMAGE) &&
+	    !test_result_state(TOI_ABORTED)) {
+		toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+			"TuxOnIce: Not invalidating the image due "
+			"to Keep Image being enabled.\n");
+		set_result_state(TOI_KEPT_IMAGE);
+	} else
+#endif
+		if (toiActiveAllocator)
+			toiActiveAllocator->remove_image();
+
+	free_bitmaps();
+
+	if (buffer && i) {
+		/* Printk can only handle 1023 bytes, including
+		 * its level mangling. */
+		for (i = 0; i < 3; i++)
+			printk("%s", buffer + (1023 * i));
+		toi_free_page(20, (unsigned long) buffer);
+	}
+
+	if (!test_action_state(TOI_LATE_CPU_HOTPLUG))
+		enable_nonboot_cpus();
+	toi_cleanup_console();
+
+	free_attention_list();
+
+	toi_deactivate_storage(0);
+
+	clear_toi_state(TOI_IGNORE_LOGLEVEL);
+	clear_toi_state(TOI_TRYING_TO_RESUME);
+	clear_toi_state(TOI_NOW_RESUMING);
+
+	if (got_pmsem) {
+		mutex_unlock(&pm_mutex);
+		got_pmsem = 0;
+	}
+}
+
+/**
+ * check_still_keeping_image: We kept an image; check whether to reuse it.
+ *
+ * We enter this routine when we have kept an image. If the user has said they
+ * want to still keep it, all we need to do is powerdown. If powering down
+ * means hibernating to ram and the power doesn't run out, we'll return 1.
+ * If we do power off properly or the battery runs out, we'll resume via the
+ * normal paths.
+ *
+ * If the user has said they want to remove the previously kept image, we
+ * remove it, and return 0. We'll then store a new image.
+ */
+static int check_still_keeping_image(void)
+{
+	if (test_action_state(TOI_KEEP_IMAGE)) {
+		printk("Image already stored: powering down immediately.");
+		do_toi_step(STEP_HIBERNATE_POWERDOWN);
+		return 1;	/* Just in case we're using S3 */
+	}
+
+	printk("Invalidating previous image.\n");
+	toiActiveAllocator->remove_image();
+
+	return 0;
+}
+
+/**
+ * toi_init: Prepare to hibernate to disk.
+ *
+ * Initialise variables & data structures, in preparation for
+ * hibernating to disk.
+ */
+static int toi_init(void)
+{
+	int result, i, j;
+
+	toi_result = 0;
+
+	printk(KERN_INFO "Initiating a hibernation cycle.\n");
+
+	nr_hibernates++;
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < 2; j++)
+			toi_bkd.toi_io_time[i][j] = 0;
+
+	if (!test_toi_state(TOI_CAN_HIBERNATE) ||
+	    allocate_bitmaps())
+		return 1;
+
+	mark_nosave_pages();
+
+	toi_prepare_console();
+
+	result = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+	if (result) {
+		set_result_state(TOI_NOTIFIERS_PREPARE_FAILED);
+		return 1;
+	}
+	set_toi_state(TOI_NOTIFIERS_PREPARE);
+
+	boot_kernel_data_buffer = toi_get_zeroed_page(37, TOI_ATOMIC_GFP);
+	if (!boot_kernel_data_buffer) {
+		printk(KERN_ERR "TuxOnIce: Failed to allocate "
+				"boot_kernel_data_buffer.\n");
+		set_result_state(TOI_OUT_OF_MEMORY);
+		return 1;
+	}
+
+	if (test_action_state(TOI_LATE_CPU_HOTPLUG) ||
+			!disable_nonboot_cpus())
+		return 1;
+
+	set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+	return 0;
+}
+
+/**
+ * can_hibernate: Perform basic 'Can we hibernate?' tests.
+ *
+ * Perform basic tests that must pass if we're going to be able to hibernate:
+ * Can we get the pm_mutex? Is resume= valid (we need to know where to write
+ * the image header).
+ */
+static int can_hibernate(void)
+{
+	if (get_pmsem) {
+		if (!mutex_trylock(&pm_mutex)) {
+			printk(KERN_INFO "TuxOnIce: Failed to obtain "
+					"pm_mutex.\n");
+			dump_stack();
+			set_abort_result(TOI_PM_SEM);
+			return 0;
+		}
+		got_pmsem = 1;
+	}
+
+	if (!test_toi_state(TOI_CAN_HIBERNATE))
+		toi_attempt_to_parse_resume_device(0);
+
+	if (!test_toi_state(TOI_CAN_HIBERNATE)) {
+		printk(KERN_INFO "TuxOnIce: Hibernation is disabled.\n"
+			"This may be because you haven't put something along "
+			"the lines of\n\nresume=swap:/dev/hda1\n\n"
+			"in lilo.conf or equivalent. (Where /dev/hda1 is your "
+			"swap partition).\n");
+		set_abort_result(TOI_CANT_SUSPEND);
+		if (!got_pmsem) {
+			mutex_unlock(&pm_mutex);
+			got_pmsem = 0;
+		}
+		return 0;
+	}
+
+	return 1;
+}
+
+/**
+ * do_post_image_write: Having written an image, figure out what to do next.
+ *
+ * After writing an image, we might load an alternate image or power down.
+ * Powering down might involve hibernating to ram, in which case we also
+ * need to handle reloading pageset2.
+ */
+static int do_post_image_write(void)
+{
+	/* If switching images fails, do normal powerdown */
+	if (alt_resume_param[0])
+		do_toi_step(STEP_RESUME_ALT_IMAGE);
+
+	toi_power_down();
+
+	/* If we return, it's because we hibernated to ram */
+	if (read_pageset2(1))
+		panic("Attempt to reload pagedir 2 failed. Try rebooting.");
+
+	barrier();
+	mb();
+	do_cleanup(1);
+	return 0;
+}
+
+/**
+ * __save_image: Do the hard work of saving the image.
+ *
+ * High level routine for getting the image saved. The key assumptions made
+ * are that processes have been frozen and sufficient memory is available.
+ *
+ * We also exit through here at resume time, coming back from toi_hibernate
+ * after the atomic restore. This is the reason for the toi_in_hibernate
+ * test.
+ */
+static int __save_image(void)
+{
+	int temp_result, did_copy = 0;
+
+	toi_prepare_status(DONT_CLEAR_BAR, "Starting to save the image..");
+
+	toi_message(TOI_ANY_SECTION, TOI_LOW, 1,
+		" - Final values: %d and %d.\n",
+		pagedir1.size, pagedir2.size);
+
+	toi_cond_pause(1, "About to write pagedir2.");
+
+	temp_result = write_pageset(&pagedir2);
+
+	if (temp_result == -1 || test_result_state(TOI_ABORTED))
+		return 1;
+
+	toi_cond_pause(1, "About to copy pageset 1.");
+
+	if (test_result_state(TOI_ABORTED))
+		return 1;
+
+	toi_deactivate_storage(1);
+
+	toi_prepare_status(DONT_CLEAR_BAR, "Doing atomic copy.");
+
+	toi_in_hibernate = 1;
+
+	if (toi_go_atomic(PMSG_FREEZE, 1))
+		goto Failed;
+
+	temp_result = toi_hibernate();
+	if (!temp_result)
+		did_copy = 1;
+
+	/* We return here at resume time too! */
+	toi_end_atomic(ATOMIC_ALL_STEPS, toi_in_hibernate, temp_result);
+
+Failed:
+	if (toi_activate_storage(1))
+		panic("Failed to reactivate our storage.");
+
+	/* Resume time? */
+	if (!toi_in_hibernate) {
+		copyback_post();
+		return 0;
+	}
+
+	/* Nope. Hibernating. So, see if we can save the image... */
+
+	if (temp_result || test_result_state(TOI_ABORTED)) {
+		if (did_copy)
+			goto abort_reloading_pagedir_two;
+		else
+			return 1;
+	}
+
+	toi_update_status(pagedir2.size, pagedir1.size + pagedir2.size,
+			NULL);
+
+	if (test_result_state(TOI_ABORTED))
+		goto abort_reloading_pagedir_two;
+
+	toi_cond_pause(1, "About to write pageset1.");
+
+	toi_message(TOI_ANY_SECTION, TOI_LOW, 1, "-- Writing pageset1\n");
+
+	temp_result = write_pageset(&pagedir1);
+
+	/* We didn't overwrite any memory, so no reread needs to be done. */
+	if (test_action_state(TOI_TEST_FILTER_SPEED))
+		return 1;
+
+	if (temp_result == 1 || test_result_state(TOI_ABORTED))
+		goto abort_reloading_pagedir_two;
+
+	toi_cond_pause(1, "About to write header.");
+
+	if (test_result_state(TOI_ABORTED))
+		goto abort_reloading_pagedir_two;
+
+	temp_result = write_image_header();
+
+	if (test_action_state(TOI_TEST_BIO))
+		return 1;
+
+	if (!temp_result && !test_result_state(TOI_ABORTED))
+		return 0;
+
+abort_reloading_pagedir_two:
+	temp_result = read_pageset2(1);
+
+	/* If that failed, we're sunk. Panic! */
+	if (temp_result)
+		panic("Attempt to reload pagedir 2 while aborting "
+				"a hibernate failed.");
+
+	return 1;
+}
+
+/**
+ * do_save_image: Save the image and handle the result.
+ *
+ * Save the prepared image. If we fail or we're in the path returning
+ * from the atomic restore, cleanup.
+ */
+
+static int do_save_image(void)
+{
+	int result = __save_image();
+	if (!toi_in_hibernate || result)
+		do_cleanup(1);
+	return result;
+}
+
+
+/**
+ * do_prepare_image: Try to prepare an image.
+ *
+ * Seek to initialise and prepare an image to be saved. On failure,
+ * cleanup.
+ */
+
+static int do_prepare_image(void)
+{
+	if (toi_activate_storage(0))
+		return 1;
+
+	/*
+	 * If kept image and still keeping image and hibernating to RAM, we will
+	 * return 1 after hibernating and resuming (provided the power doesn't
+	 * run out. In that case, we skip directly to cleaning up and exiting.
+	 */
+
+	if (!can_hibernate() ||
+	    (test_result_state(TOI_KEPT_IMAGE) &&
+	     check_still_keeping_image()))
+		goto cleanup;
+
+	if (toi_init() && !toi_prepare_image() &&
+			!test_result_state(TOI_ABORTED))
+		return 0;
+
+cleanup:
+	do_cleanup(0);
+	return 1;
+}
+
+/**
+ * do_check_can_resume: Find out whether an image has been stored.
+ *
+ * Read whether an image exists. We use the same routine as the
+ * image_exists sysfs entry, and just look to see whether the
+ * first character in the resulting buffer is a '1'.
+ */
+int do_check_can_resume(void)
+{
+	char *buf = (char *) toi_get_zeroed_page(21, TOI_ATOMIC_GFP);
+	int result = 0;
+
+	if (!buf)
+		return 0;
+
+	/* Only interested in first byte, so throw away return code. */
+	image_exists_read(buf, PAGE_SIZE);
+
+	if (buf[0] == '1')
+		result = 1;
+
+	toi_free_page(21, (unsigned long) buf);
+	return result;
+}
+
+/**
+ * do_load_atomic_copy: Load the first part of an image, if it exists.
+ *
+ * Check whether we have an image. If one exists, do sanity checking
+ * (possibly invalidating the image or even rebooting if the user
+ * requests that) before loading it into memory in preparation for the
+ * atomic restore.
+ *
+ * If and only if we have an image loaded and ready to restore, we return 1.
+ */
+static int do_load_atomic_copy(void)
+{
+	int read_image_result = 0;
+
+	if (sizeof(swp_entry_t) != sizeof(long)) {
+		printk(KERN_WARNING "TuxOnIce: The size of swp_entry_t != size"
+			" of long. Please report this!\n");
+		return 1;
+	}
+
+	if (!resume_file[0])
+		printk(KERN_WARNING "TuxOnIce: "
+			"You need to use a resume= command line parameter to "
+			"tell TuxOnIce where to look for an image.\n");
+
+	toi_activate_storage(0);
+
+	if (!(test_toi_state(TOI_RESUME_DEVICE_OK)) &&
+		!toi_attempt_to_parse_resume_device(0)) {
+		/*
+		 * Without a usable storage device we can do nothing -
+		 * even if noresume is given
+		 */
+
+		if (!toiNumAllocators)
+			printk(KERN_ALERT "TuxOnIce: "
+			  "No storage allocators have been registered.\n");
+		else
+			printk(KERN_ALERT "TuxOnIce: "
+				"Missing or invalid storage location "
+				"(resume= parameter). Please correct and "
+				"rerun lilo (or equivalent) before "
+				"hibernating.\n");
+		toi_deactivate_storage(0);
+		return 1;
+	}
+
+	read_image_result = read_pageset1(); /* non fatal error ignored */
+
+	if (test_toi_state(TOI_NORESUME_SPECIFIED))
+		clear_toi_state(TOI_NORESUME_SPECIFIED);
+
+	toi_deactivate_storage(0);
+
+	if (read_image_result)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * prepare_restore_load_alt_image: Save & restore alt image variables.
+ *
+ * Save and restore the pageset1 maps, when loading an alternate image.
+ */
+static void prepare_restore_load_alt_image(int prepare)
+{
+	static struct dyn_pageflags pageset1_map_save, pageset1_copy_map_save;
+
+	if (prepare) {
+		memcpy(&pageset1_map_save, &pageset1_map,
+				sizeof(struct dyn_pageflags));
+		pageset1_map.bitmap = NULL;
+		pageset1_map.sparse = 0;
+		pageset1_map.initialised = 0;
+		memcpy(&pageset1_copy_map_save, &pageset1_copy_map,
+			sizeof(struct dyn_pageflags));
+		pageset1_copy_map.bitmap = NULL;
+		pageset1_copy_map.sparse = 0;
+		pageset1_copy_map.initialised = 0;
+		set_toi_state(TOI_LOADING_ALT_IMAGE);
+		toi_reset_alt_image_pageset2_pfn();
+	} else {
+		if (pageset1_map.bitmap)
+			free_dyn_pageflags(&pageset1_map);
+		memcpy(&pageset1_map, &pageset1_map_save,
+			sizeof(struct dyn_pageflags));
+		if (pageset1_copy_map.bitmap)
+			free_dyn_pageflags(&pageset1_copy_map);
+		memcpy(&pageset1_copy_map, &pageset1_copy_map_save,
+			sizeof(struct dyn_pageflags));
+		clear_toi_state(TOI_NOW_RESUMING);
+		clear_toi_state(TOI_LOADING_ALT_IMAGE);
+	}
+}
+
+/**
+ * pre_resume_freeze: Freeze the system, before doing an atomic restore.
+ *
+ * Hot unplug cpus (if we didn't do it early) and freeze processes, in
+ * preparation for doing an atomic restore.
+ */
+int pre_resume_freeze(void)
+{
+	if (!test_action_state(TOI_LATE_CPU_HOTPLUG)) {
+		toi_prepare_status(DONT_CLEAR_BAR, "Disable nonboot cpus.");
+		if (disable_nonboot_cpus()) {
+			set_abort_result(TOI_CPU_HOTPLUG_FAILED);
+			return 1;
+		}
+	}
+
+	toi_prepare_status(DONT_CLEAR_BAR,	"Freeze processes.");
+
+	if (freeze_processes()) {
+		printk("Some processes failed to stop.\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * do_toi_step: Perform a step in hibernating or resuming.
+ *
+ * Perform a step in hibernating or resuming an image. This abstraction
+ * is in preparation for implementing cluster support, and perhaps replacing
+ * uswsusp too (haven't looked whether that's possible yet).
+ */
+int do_toi_step(int step)
+{
+	switch (step) {
+	case STEP_HIBERNATE_PREPARE_IMAGE:
+		return do_prepare_image();
+	case STEP_HIBERNATE_SAVE_IMAGE:
+		return do_save_image();
+	case STEP_HIBERNATE_POWERDOWN:
+		return do_post_image_write();
+	case STEP_RESUME_CAN_RESUME:
+		return do_check_can_resume();
+	case STEP_RESUME_LOAD_PS1:
+		return do_load_atomic_copy();
+	case STEP_RESUME_DO_RESTORE:
+		/*
+		 * If we succeed, this doesn't return.
+		 * Instead, we return from do_save_image() in the
+		 * hibernated kernel.
+		 */
+		return toi_atomic_restore();
+	case STEP_RESUME_ALT_IMAGE:
+		printk(KERN_INFO "Trying to resume alternate image.\n");
+		toi_in_hibernate = 0;
+		save_restore_alt_param(SAVE, NOQUIET);
+		prepare_restore_load_alt_image(1);
+		if (!do_check_can_resume()) {
+			printk(KERN_INFO "Nothing to resume from.\n");
+			goto out;
+		}
+		if (!do_load_atomic_copy())
+			toi_atomic_restore();
+
+		printk(KERN_INFO "Failed to load image.\n");
+out:
+		prepare_restore_load_alt_image(0);
+		save_restore_alt_param(RESTORE, NOQUIET);
+		break;
+	case STEP_CLEANUP:
+		do_cleanup(1);
+		break;
+	case STEP_QUIET_CLEANUP:
+		do_cleanup(0);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(do_toi_step);
+
+/* -- Functions for kickstarting a hibernate or resume --- */
+
+/**
+ * __toi_try_resume: Try to do the steps in resuming.
+ *
+ * Check if we have an image and if so try to resume. Clear the status
+ * flags too.
+ */
+void __toi_try_resume(void)
+{
+	set_toi_state(TOI_TRYING_TO_RESUME);
+	resume_attempted = 1;
+
+	current->flags |= PF_MEMALLOC;
+
+	if (do_toi_step(STEP_RESUME_CAN_RESUME) &&
+	    !do_toi_step(STEP_RESUME_LOAD_PS1))
+	    do_toi_step(STEP_RESUME_DO_RESTORE);
+
+	do_cleanup(0);
+
+	current->flags &= ~PF_MEMALLOC;
+
+	clear_toi_state(TOI_IGNORE_LOGLEVEL);
+	clear_toi_state(TOI_TRYING_TO_RESUME);
+	clear_toi_state(TOI_NOW_RESUMING);
+}
+
+/**
+ * _toi_try_resume: Wrapper calling __toi_try_resume from do_mounts.
+ *
+ * Wrapper for when __toi_try_resume is called from init/do_mounts.c,
+ * rather than from echo > /sys/power/tuxonice/do_resume.
+ */
+void _toi_try_resume(void)
+{
+	resume_attempted = 1;
+
+	/*
+	 * There's a comment in kernel/power/disk.c that indicates
+	 * we should be able to use mutex_lock_nested below. That
+	 * doesn't seem to cut it, though, so let's just turn lockdep
+	 * off for now.
+	 */
+	lockdep_off();
+
+	if (toi_start_anything(SYSFS_RESUMING))
+		goto out;
+
+	/* Unlock will be done in do_cleanup */
+	mutex_lock(&pm_mutex);
+	got_pmsem = 1;
+
+	__toi_try_resume();
+
+	/*
+	 * For initramfs, we have to clear the boot time
+	 * flag after trying to resume
+	 */
+	clear_toi_state(TOI_BOOT_TIME);
+
+out:
+	toi_finish_anything(SYSFS_RESUMING);
+	lockdep_on();
+
+}
+
+/**
+ * _toi_try_hibernate: Try to start a hibernation cycle.
+ *
+ * have_pmsem: Whther the pm_sem is already taken.
+ *
+ * Start a hibernation cycle, coming in from either
+ * echo > /sys/power/tuxonice/do_suspend
+ *
+ * or
+ *
+ * echo disk > /sys/power/state
+ *
+ * In the later case, we come in without pm_sem taken; in the
+ * former, it has been taken.
+ */
+int _toi_try_hibernate(int have_pmsem)
+{
+	int result = 0, sys_power_disk = 0;
+
+	if (!mutex_is_locked(&tuxonice_in_use)) {
+		/* Came in via /sys/power/disk */
+		if (toi_start_anything(SYSFS_HIBERNATING))
+			return -EBUSY;
+		sys_power_disk = 1;
+	}
+
+	get_pmsem = !have_pmsem;
+
+	if (strlen(alt_resume_param)) {
+		attempt_to_parse_alt_resume_param();
+
+		if (!strlen(alt_resume_param)) {
+			printk(KERN_INFO "Alternate resume parameter now "
+					"invalid. Aborting.\n");
+			goto out;
+		}
+	}
+
+	current->flags |= PF_MEMALLOC;
+
+	if (test_toi_state(TOI_CLUSTER_MODE)) {
+		toi_initiate_cluster_hibernate();
+		goto out;
+	}
+
+	result = do_toi_step(STEP_HIBERNATE_PREPARE_IMAGE);
+	if (result)
+		goto out;
+
+	if (test_action_state(TOI_FREEZER_TEST)) {
+		do_cleanup(0);
+		goto out;
+	}
+
+	result = do_toi_step(STEP_HIBERNATE_SAVE_IMAGE);
+	if (result)
+		goto out;
+
+	/* This code runs at resume time too! */
+	if (toi_in_hibernate)
+		result = do_toi_step(STEP_HIBERNATE_POWERDOWN);
+out:
+	current->flags &= ~PF_MEMALLOC;
+
+	if (sys_power_disk)
+		toi_finish_anything(SYSFS_HIBERNATING);
+
+	return result;
+}
+
+/*
+ * channel_no: If !0, -c <channel_no> is added to args (userui).
+ */
+int toi_launch_userspace_program(char *command, int channel_no,
+		enum umh_wait wait)
+{
+	int retval;
+	static char *envp[] = {
+			"HOME=/",
+			"TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			NULL };
+	static char *argv[] =
+		{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
+	char *channel = NULL;
+	int arg = 0, size;
+	char test_read[255];
+	char *orig_posn = command;
+
+	if (!strlen(orig_posn))
+		return 1;
+
+	if (channel_no) {
+		channel = toi_kzalloc(4, 6, GFP_KERNEL);
+		if (!channel) {
+			printk(KERN_INFO "Failed to allocate memory in "
+				"preparing to launch userspace program.\n");
+			return 1;
+		}
+	}
+
+	/* Up to 7 args supported */
+	while (arg < 7) {
+		sscanf(orig_posn, "%s", test_read);
+		size = strlen(test_read);
+		if (!(size))
+			break;
+		argv[arg] = toi_kzalloc(5, size + 1, TOI_ATOMIC_GFP);
+		strcpy(argv[arg], test_read);
+		orig_posn += size + 1;
+		*test_read = 0;
+		arg++;
+	}
+
+	if (channel_no) {
+		sprintf(channel, "-c%d", channel_no);
+		argv[arg] = channel;
+	} else
+		arg--;
+
+	retval = call_usermodehelper(argv[0], argv, envp, wait);
+
+	/*
+	 * If the program reports an error, retval = 256. Don't complain
+	 * about that here.
+	 */
+	if (retval && retval != 256)
+		printk("Failed to launch userspace program '%s': Error %d\n",
+				command, retval);
+
+	{
+		int i;
+		for (i = 0; i < arg; i++)
+			if (argv[i] && argv[i] != channel)
+				toi_kfree(5, argv[i]);
+	}
+
+	toi_kfree(4, channel);
+
+	return retval;
+}
+
+/*
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ */
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("extra_pages_allowance", SYSFS_RW),
+	  SYSFS_LONG(&extra_pd1_pages_allowance, 0,
+			LONG_MAX, 0)
+	},
+
+	{ TOI_ATTR("image_exists", SYSFS_RW),
+	  SYSFS_CUSTOM(image_exists_read, image_exists_write,
+			  SYSFS_NEEDS_SM_FOR_BOTH)
+	},
+
+	{ TOI_ATTR("resume", SYSFS_RW),
+	  SYSFS_STRING(resume_file, 255, SYSFS_NEEDS_SM_FOR_WRITE),
+	  .write_side_effect = attempt_to_parse_resume_device2,
+	},
+
+	{ TOI_ATTR("alt_resume_param", SYSFS_RW),
+	  SYSFS_STRING(alt_resume_param, 255, SYSFS_NEEDS_SM_FOR_WRITE),
+	  .write_side_effect = attempt_to_parse_alt_resume_param,
+	},
+	{ TOI_ATTR("debug_info", SYSFS_READONLY),
+	  SYSFS_CUSTOM(get_toi_debug_info, NULL, 0)
+	},
+
+	{ TOI_ATTR("ignore_rootfs", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_IGNORE_ROOTFS, 0)
+	},
+
+	{ TOI_ATTR("image_size_limit", SYSFS_RW),
+	  SYSFS_INT(&image_size_limit, -2, INT_MAX, 0)
+	},
+
+	{ TOI_ATTR("last_result", SYSFS_RW),
+	  SYSFS_UL(&toi_result, 0, 0, 0)
+	},
+
+	{ TOI_ATTR("no_multithreaded_io", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_NO_MULTITHREADED_IO, 0)
+	},
+
+	{ TOI_ATTR("no_flusher_thread", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_NO_FLUSHER_THREAD, 0)
+	},
+
+	{ TOI_ATTR("full_pageset2", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_PAGESET2_FULL, 0)
+	},
+
+	{ TOI_ATTR("reboot", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_REBOOT, 0)
+	},
+
+	{ TOI_ATTR("replace_swsusp", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_REPLACE_SWSUSP, 0)
+	},
+
+	{ TOI_ATTR("resume_commandline", SYSFS_RW),
+	  SYSFS_STRING(toi_bkd.toi_nosave_commandline, COMMAND_LINE_SIZE, 0)
+	},
+
+	{ TOI_ATTR("version", SYSFS_READONLY),
+	  SYSFS_STRING(TOI_CORE_VERSION, 0, 0)
+	},
+
+	{ TOI_ATTR("no_load_direct", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_NO_DIRECT_LOAD, 0)
+	},
+
+	{ TOI_ATTR("freezer_test", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_FREEZER_TEST, 0)
+	},
+
+	{ TOI_ATTR("test_bio", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_TEST_BIO, 0)
+	},
+
+	{ TOI_ATTR("test_filter_speed", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_TEST_FILTER_SPEED, 0)
+	},
+
+	{ TOI_ATTR("no_pageset2", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_NO_PAGESET2, 0)
+	},
+
+	{ TOI_ATTR("late_cpu_hotplug", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_LATE_CPU_HOTPLUG, 0)
+	},
+
+	{ TOI_ATTR("pre_hibernate_command", SYSFS_RW),
+	  SYSFS_STRING(pre_hibernate_command, 0, 255)
+	},
+
+	{ TOI_ATTR("post_hibernate_command", SYSFS_RW),
+	  SYSFS_STRING(post_hibernate_command, 0, 255)
+	},
+
+#ifdef CONFIG_TOI_KEEP_IMAGE
+	{ TOI_ATTR("keep_image", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_KEEP_IMAGE, 0)
+	},
+#endif
+};
+
+struct toi_core_fns my_fns = {
+	.get_nonconflicting_page = __toi_get_nonconflicting_page,
+	.post_context_save = __toi_post_context_save,
+	.try_hibernate = _toi_try_hibernate,
+	.try_resume = _toi_try_resume,
+};
+
+/**
+ * core_load: Initialisation of TuxOnIce core.
+ *
+ * Initialise the core, beginning with sysfs. Checksum and so on are part of
+ * the core, but have their own initialisation routines because they either
+ * aren't compiled in all the time or have their own subdirectories.
+ */
+static __init int core_load(void)
+{
+	int i,
+	    numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+	printk(KERN_INFO "TuxOnIce " TOI_CORE_VERSION
+			" (http://tuxonice.net)\n");
+	strncpy(pre_hibernate_command, CONFIG_TOI_DEFAULT_PRE_HIBERNATE, 255);
+	strncpy(post_hibernate_command, CONFIG_TOI_DEFAULT_POST_HIBERNATE, 255);
+
+	if (toi_sysfs_init())
+		return 1;
+
+	for (i = 0; i < numfiles; i++)
+		toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+	toi_core_fns = &my_fns;
+
+	if (toi_alloc_init())
+		return 1;
+	if (toi_checksum_init())
+		return 1;
+	if (toi_cluster_init())
+		return 1;
+	if (toi_usm_init())
+		return 1;
+	if (toi_ui_init())
+		return 1;
+	if (toi_poweroff_init())
+		return 1;
+
+	return 0;
+}
+
+#ifdef MODULE
+/**
+ * core_unload: Prepare to unload the core code.
+ */
+static __exit void core_unload(void)
+{
+	int i,
+	    numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+	toi_alloc_exit();
+	toi_poweroff_exit();
+	toi_ui_exit();
+	toi_checksum_exit();
+	toi_cluster_exit();
+	toi_usm_exit();
+
+	for (i = 0; i < numfiles; i++)
+		toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+	toi_core_fns = NULL;
+
+	toi_sysfs_exit();
+}
+MODULE_LICENSE("GPL");
+module_init(core_load);
+module_exit(core_unload);
+#else
+late_initcall(core_load);
+#endif
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(tuxonice_signature);
+EXPORT_SYMBOL_GPL(pagedir2);
+EXPORT_SYMBOL_GPL(toi_fail_num);
+EXPORT_SYMBOL_GPL(do_check_can_resume);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_io.c linux-2.6-block-custom/kernel/power/tuxonice_io.c
--- linux-2.6-block/kernel/power/tuxonice_io.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_io.c	2008-09-26 19:48:23.995776559 +0900
@@ -0,0 +1,1427 @@
+/*
+ * kernel/power/tuxonice_io.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/version.h>
+#include <linux/utsname.h>
+#include <linux/mount.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/dyn_pageflags.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_pageflags.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_alloc.h"
+char alt_resume_param[256];
+
+/* Variables shared between threads and updated under the mutex */
+static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
+static int io_index, io_nextupdate, io_pc, io_pc_step;
+static unsigned long pfn, other_pfn;
+static DEFINE_MUTEX(io_mutex);
+static DEFINE_PER_CPU(struct page *, last_sought);
+static DEFINE_PER_CPU(struct page *, last_high_page);
+static DEFINE_PER_CPU(char *, checksum_locn);
+static DEFINE_PER_CPU(struct pbe *, last_low_page);
+static atomic_t io_count;
+atomic_t toi_io_workers;
+DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
+int toi_bio_queue_flusher_should_finish;
+
+/* toi_attempt_to_parse_resume_device
+ *
+ * Can we hibernate, using the current resume= parameter?
+ */
+int toi_attempt_to_parse_resume_device(int quiet)
+{
+	struct list_head *Allocator;
+	struct toi_module_ops *thisAllocator;
+	int result, returning = 0;
+
+	if (toi_activate_storage(0))
+		return 0;
+
+	toiActiveAllocator = NULL;
+	clear_toi_state(TOI_RESUME_DEVICE_OK);
+	clear_toi_state(TOI_CAN_RESUME);
+	clear_result_state(TOI_ABORTED);
+
+	if (!toiNumAllocators) {
+		if (!quiet)
+			printk(KERN_INFO "TuxOnIce: No storage allocators have "
+				"been registered. Hibernating will be "
+				"disabled.\n");
+		goto cleanup;
+	}
+
+	if (!resume_file[0]) {
+		if (!quiet)
+			printk("TuxOnIce: Resume= parameter is empty."
+				" Hibernating will be disabled.\n");
+		goto cleanup;
+	}
+
+	list_for_each(Allocator, &toiAllocators) {
+		thisAllocator = list_entry(Allocator, struct toi_module_ops,
+								type_list);
+
+		/*
+		 * Not sure why you'd want to disable an allocator, but
+		 * we should honour the flag if we're providing it
+		 */
+		if (!thisAllocator->enabled)
+			continue;
+
+		result = thisAllocator->parse_sig_location(
+				resume_file, (toiNumAllocators == 1),
+				quiet);
+
+		switch (result) {
+		case -EINVAL:
+			/* For this allocator, but not a valid
+			 * configuration. Error already printed. */
+			goto cleanup;
+
+		case 0:
+			/* For this allocator and valid. */
+			toiActiveAllocator = thisAllocator;
+
+			set_toi_state(TOI_RESUME_DEVICE_OK);
+			set_toi_state(TOI_CAN_RESUME);
+			returning = 1;
+			goto cleanup;
+		}
+	}
+	if (!quiet)
+		printk("TuxOnIce: No matching enabled allocator found. "
+				"Resuming disabled.\n");
+cleanup:
+	toi_deactivate_storage(0);
+	return returning;
+}
+
+void attempt_to_parse_resume_device2(void)
+{
+	toi_prepare_usm();
+	toi_attempt_to_parse_resume_device(0);
+	toi_cleanup_usm();
+}
+
+void save_restore_alt_param(int replace, int quiet)
+{
+	static char resume_param_save[255];
+	static unsigned long toi_state_save;
+
+	if (replace) {
+		toi_state_save = toi_state;
+		strcpy(resume_param_save, resume_file);
+		strcpy(resume_file, alt_resume_param);
+	} else {
+		strcpy(resume_file, resume_param_save);
+		toi_state = toi_state_save;
+	}
+	toi_attempt_to_parse_resume_device(quiet);
+}
+
+void attempt_to_parse_alt_resume_param(void)
+{
+	int ok = 0;
+
+	/* Temporarily set resume_param to the poweroff value */
+	if (!strlen(alt_resume_param))
+		return;
+
+	printk("=== Trying Poweroff Resume2 ===\n");
+	save_restore_alt_param(SAVE, NOQUIET);
+	if (test_toi_state(TOI_CAN_RESUME))
+		ok = 1;
+
+	printk(KERN_INFO "=== Done ===\n");
+	save_restore_alt_param(RESTORE, QUIET);
+
+	/* If not ok, clear the string */
+	if (ok)
+		return;
+
+	printk(KERN_INFO "Can't resume from that location; clearing "
+			"alt_resume_param.\n");
+	alt_resume_param[0] = '\0';
+}
+
+/* noresume_reset_modules
+ *
+ * Description:	When we read the start of an image, modules (and especially the
+ * 		active allocator) might need to reset data structures if we
+ * 		decide to remove the image rather than resuming from it.
+ */
+
+static void noresume_reset_modules(void)
+{
+	struct toi_module_ops *this_filter;
+
+	list_for_each_entry(this_filter, &toi_filters, type_list)
+		if (this_filter->noresume_reset)
+			this_filter->noresume_reset();
+
+	if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
+		toiActiveAllocator->noresume_reset();
+}
+
+/* fill_toi_header()
+ *
+ * Description:	Fill the hibernate header structure.
+ * Arguments:	struct toi_header: Header data structure to be filled.
+ */
+
+static int fill_toi_header(struct toi_header *sh)
+{
+	int i, error;
+
+	error = init_swsusp_header((struct swsusp_info *) sh);
+	if (error)
+		return error;
+
+	sh->pagedir = pagedir1;
+	sh->pageset_2_size = pagedir2.size;
+	sh->param0 = toi_result;
+	sh->param1 = toi_bkd.toi_action;
+	sh->param2 = toi_bkd.toi_debug_state;
+	sh->param3 = toi_bkd.toi_default_console_level;
+	sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
+	for (i = 0; i < 4; i++)
+		sh->io_time[i/2][i%2] = toi_bkd.toi_io_time[i/2][i%2];
+	sh->bkd = boot_kernel_data_buffer;
+	return 0;
+}
+
+/*
+ * rw_init_modules
+ *
+ * Iterate over modules, preparing the ones that will be used to read or write
+ * data.
+ */
+static int rw_init_modules(int rw, int which)
+{
+	struct toi_module_ops *this_module;
+	/* Initialise page transformers */
+	list_for_each_entry(this_module, &toi_filters, type_list) {
+		if (!this_module->enabled)
+			continue;
+		if (this_module->rw_init && this_module->rw_init(rw, which)) {
+			abort_hibernate(TOI_FAILED_MODULE_INIT,
+				"Failed to initialise the %s filter.",
+				this_module->name);
+			return 1;
+		}
+	}
+
+	/* Initialise allocator */
+	if (toiActiveAllocator->rw_init(rw, which)) {
+		abort_hibernate(TOI_FAILED_MODULE_INIT,
+				"Failed to initialise the allocator.");
+		return 1;
+	}
+
+	/* Initialise other modules */
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled ||
+		    this_module->type == FILTER_MODULE ||
+		    this_module->type == WRITER_MODULE)
+			continue;
+		if (this_module->rw_init && this_module->rw_init(rw, which)) {
+			set_abort_result(TOI_FAILED_MODULE_INIT);
+			printk(KERN_INFO "Setting aborted flag due to module "
+					"init failure.\n");
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * rw_cleanup_modules
+ *
+ * Cleanup components after reading or writing a set of pages.
+ * Only the allocator may fail.
+ */
+static int rw_cleanup_modules(int rw)
+{
+	struct toi_module_ops *this_module;
+	int result = 0;
+
+	/* Cleanup other modules */
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled ||
+		    this_module->type == FILTER_MODULE ||
+		    this_module->type == WRITER_MODULE)
+			continue;
+		if (this_module->rw_cleanup)
+			result |= this_module->rw_cleanup(rw);
+	}
+
+	/* Flush data and cleanup */
+	list_for_each_entry(this_module, &toi_filters, type_list) {
+		if (!this_module->enabled)
+			continue;
+		if (this_module->rw_cleanup)
+			result |= this_module->rw_cleanup(rw);
+	}
+
+	result |= toiActiveAllocator->rw_cleanup(rw);
+
+	return result;
+}
+
+static struct page *copy_page_from_orig_page(struct page *orig_page)
+{
+	int is_high = PageHighMem(orig_page), index, min, max;
+	struct page *high_page = NULL,
+		    **my_last_high_page = &__get_cpu_var(last_high_page),
+		    **my_last_sought = &__get_cpu_var(last_sought);
+	struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page);
+	void *compare;
+
+	if (is_high) {
+		if (*my_last_sought && *my_last_high_page &&
+				*my_last_sought < orig_page)
+			high_page = *my_last_high_page;
+		else
+			high_page = (struct page *) restore_highmem_pblist;
+		this = (struct pbe *) kmap(high_page);
+		compare = orig_page;
+	} else {
+		if (*my_last_sought && *my_last_low_page &&
+				*my_last_sought < orig_page)
+			this = *my_last_low_page;
+		else
+			this = restore_pblist;
+		compare = page_address(orig_page);
+	}
+
+	*my_last_sought = orig_page;
+
+	/* Locate page containing pbe */
+	while (this[PBES_PER_PAGE - 1].next &&
+			this[PBES_PER_PAGE - 1].orig_address < compare) {
+		if (is_high) {
+			struct page *next_high_page = (struct page *)
+				this[PBES_PER_PAGE - 1].next;
+			kunmap(high_page);
+			this = kmap(next_high_page);
+			high_page = next_high_page;
+		} else
+			this = this[PBES_PER_PAGE - 1].next;
+	}
+
+	/* Do a binary search within the page */
+	min = 0;
+	max = PBES_PER_PAGE;
+	index = PBES_PER_PAGE / 2;
+	while (max - min) {
+		if (!this[index].orig_address ||
+		    this[index].orig_address > compare)
+			max = index;
+		else if (this[index].orig_address == compare) {
+			if (is_high) {
+				struct page *page = this[index].address;
+				*my_last_high_page = high_page;
+				kunmap(high_page);
+				return page;
+			}
+			*my_last_low_page = this;
+			return virt_to_page(this[index].address);
+		} else
+			min = index;
+		index = ((max + min) / 2);
+	};
+
+	if (is_high)
+		kunmap(high_page);
+
+	abort_hibernate(TOI_FAILED_IO, "Failed to get destination page for"
+		" orig page %p. This[min].orig_address=%p.\n", orig_page,
+		this[index].orig_address);
+	return NULL;
+}
+
+/*
+ * do_rw_loop
+ *
+ * The main I/O loop for reading or writing pages.
+ */
+static int worker_rw_loop(void *data)
+{
+	unsigned long orig_pfn, write_pfn;
+	int result, my_io_index = 0, temp;
+	struct toi_module_ops *first_filter = toi_get_next_filter(NULL);
+	struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
+
+	atomic_inc(&toi_io_workers);
+	mutex_lock(&io_mutex);
+
+	do {
+		int buf_size;
+
+		/*
+		 * What page to use? If reading, don't know yet which page's
+		 * data will be read, so always use the buffer. If writing,
+		 * use the copy (Pageset1) or original page (Pageset2), but
+		 * always write the pfn of the original page.
+		 */
+		if (io_write) {
+			struct page *page;
+			char **my_checksum_locn = &__get_cpu_var(checksum_locn);
+
+			pfn = get_next_bit_on(&io_map, pfn);
+
+			/* Another thread could have beaten us to it. */
+			if (pfn == max_pfn + 1) {
+				if (atomic_read(&io_count)) {
+					printk("Ran out of pfns but io_count "
+						"is still %d.\n",
+						atomic_read(&io_count));
+					BUG();
+				}
+				break;
+			}
+
+			my_io_index = io_finish_at -
+				atomic_sub_return(1, &io_count);
+
+			orig_pfn = pfn;
+			write_pfn = pfn;
+
+			/*
+			 * Other_pfn is updated by all threads, so we're not
+			 * writing the same page multiple times.
+			 */
+			clear_dynpageflag(&io_map, pfn_to_page(pfn));
+			if (io_pageset == 1) {
+				other_pfn = get_next_bit_on(&pageset1_map,
+						other_pfn);
+				write_pfn = other_pfn;
+			}
+			page = pfn_to_page(pfn);
+
+			if (io_pageset == 2)
+				*my_checksum_locn =
+					tuxonice_get_next_checksum();
+
+			mutex_unlock(&io_mutex);
+
+			if (io_pageset == 2 &&
+			    tuxonice_calc_checksum(page, *my_checksum_locn))
+					return 1;
+
+			result = first_filter->write_page(write_pfn, page,
+					PAGE_SIZE);
+		} else {
+			my_io_index = io_finish_at -
+				atomic_sub_return(1, &io_count);
+			mutex_unlock(&io_mutex);
+
+			/*
+			 * Are we aborting? If so, don't submit any more I/O as
+			 * resetting the resume_attempted flag (from ui.c) will
+			 * clear the bdev flags, making this thread oops.
+			 */
+			if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+				atomic_dec(&toi_io_workers);
+				if (!atomic_read(&toi_io_workers))
+					set_toi_state(TOI_IO_STOPPED);
+				while (1)
+					schedule();
+			}
+
+			result = first_filter->read_page(&write_pfn, buffer,
+					&buf_size);
+			if (buf_size != PAGE_SIZE) {
+				abort_hibernate(TOI_FAILED_IO,
+					"I/O pipeline returned %d bytes instead"
+					" of %d.\n", buf_size, PAGE_SIZE);
+				mutex_lock(&io_mutex);
+				break;
+			}
+		}
+
+		if (result) {
+			io_result = result;
+			if (io_write) {
+				printk(KERN_INFO "Write chunk returned %d.\n",
+						result);
+				abort_hibernate(TOI_FAILED_IO,
+					"Failed to write a chunk of the "
+					"image.");
+				mutex_lock(&io_mutex);
+				break;
+			}
+			panic("Read chunk returned (%d)", result);
+		}
+
+		/*
+		 * Discard reads of resaved pages while reading ps2
+		 * and unwanted pages while rereading ps2 when aborting.
+		 */
+		if (!io_write && !PageResave(pfn_to_page(write_pfn))) {
+			struct page *final_page = pfn_to_page(write_pfn),
+				    *copy_page = final_page;
+			char *virt, *buffer_virt;
+
+			if (io_pageset == 1 && !load_direct(final_page)) {
+				copy_page =
+					copy_page_from_orig_page(final_page);
+				BUG_ON(!copy_page);
+			}
+
+			if (test_dynpageflag(&io_map, final_page)) {
+				virt = kmap(copy_page);
+				buffer_virt = kmap(buffer);
+				memcpy(virt, buffer_virt, PAGE_SIZE);
+				kunmap(copy_page);
+				kunmap(buffer);
+				clear_dynpageflag(&io_map, final_page);
+			} else {
+				mutex_lock(&io_mutex);
+				atomic_inc(&io_count);
+				mutex_unlock(&io_mutex);
+			}
+		}
+
+		temp = my_io_index + io_base - io_nextupdate;
+
+		if (my_io_index + io_base == io_nextupdate)
+			io_nextupdate = toi_update_status(my_io_index +
+				io_base, io_barmax, " %d/%d MB ",
+				MB(io_base+my_io_index+1), MB(io_barmax));
+
+		if (my_io_index == io_pc) {
+			printk("%s%d%%...", io_pc_step == 1 ? KERN_ERR : "",
+					20 * io_pc_step);
+			io_pc_step++;
+			io_pc = io_finish_at * io_pc_step / 5;
+		}
+
+		toi_cond_pause(0, NULL);
+
+		/*
+		 * Subtle: If there's less I/O still to be done than threads
+		 * running, quit. This stops us doing I/O beyond the end of
+		 * the image when reading.
+		 *
+		 * Possible race condition. Two threads could do the test at
+		 * the same time; one should exit and one should continue.
+		 * Therefore we take the mutex before comparing and exiting.
+		 */
+
+		mutex_lock(&io_mutex);
+
+	} while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
+		!(io_write && test_result_state(TOI_ABORTED)));
+
+	if (atomic_dec_and_test(&toi_io_workers)) {
+		toi_bio_queue_flusher_should_finish = 1;
+		wake_up(&toi_io_queue_flusher);
+	}
+	mutex_unlock(&io_mutex);
+
+	toi__free_page(28, buffer);
+
+	return 0;
+}
+
+int start_other_threads(void)
+{
+	int cpu, num_started = 0;
+	struct task_struct *p;
+
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+
+		p = kthread_create(worker_rw_loop, NULL, "ks2io/%d", cpu);
+		if (IS_ERR(p)) {
+			printk("ks2io for %i failed\n", cpu);
+			continue;
+		}
+		kthread_bind(p, cpu);
+		p->flags |= PF_MEMALLOC;
+		wake_up_process(p);
+		num_started++;
+	}
+
+	return num_started;
+}
+
+/*
+ * do_rw_loop
+ *
+ * The main I/O loop for reading or writing pages.
+ */
+static int do_rw_loop(int write, int finish_at, struct dyn_pageflags *pageflags,
+		int base, int barmax, int pageset)
+{
+	int index = 0, cpu, num_other_threads = 0;
+
+	if (!finish_at)
+		return 0;
+
+	io_write = write;
+	io_finish_at = finish_at;
+	io_base = base;
+	io_barmax = barmax;
+	io_pageset = pageset;
+	io_index = 0;
+	io_pc = io_finish_at / 5;
+	io_pc_step = 1;
+	io_result = 0;
+	io_nextupdate = base + 1;
+	toi_bio_queue_flusher_should_finish = 0;
+
+	for_each_online_cpu(cpu) {
+		per_cpu(last_sought, cpu) = NULL;
+		per_cpu(last_low_page, cpu) = NULL;
+		per_cpu(last_high_page, cpu) = NULL;
+	}
+
+	/* Ensure all bits clear */
+	clear_dyn_pageflags(&io_map);
+
+	/* Set the bits for the pages to write */
+	pfn = get_next_bit_on(pageflags, max_pfn + 1);
+
+	while (pfn < max_pfn + 1 && index < finish_at) {
+		set_dynpageflag(&io_map, pfn_to_page(pfn));
+		pfn = get_next_bit_on(pageflags, pfn);
+		index++;
+	}
+
+	BUG_ON(index < finish_at);
+
+	atomic_set(&io_count, finish_at);
+
+	pfn = max_pfn + 1;
+	other_pfn = pfn;
+
+	clear_toi_state(TOI_IO_STOPPED);
+
+	if (!test_action_state(TOI_NO_MULTITHREADED_IO))
+		num_other_threads = start_other_threads();
+
+	if (!num_other_threads || !toiActiveAllocator->io_flusher ||
+		test_action_state(TOI_NO_FLUSHER_THREAD))
+		worker_rw_loop(NULL);
+	else
+		toiActiveAllocator->io_flusher(write);
+
+	while (atomic_read(&toi_io_workers))
+		schedule();
+
+	set_toi_state(TOI_IO_STOPPED);
+	if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
+		while (1)
+			schedule();
+	}
+
+	if (!io_result) {
+		printk("done.\n");
+
+		toi_update_status(io_base + io_finish_at, io_barmax,
+				" %d/%d MB ",
+				MB(io_base + io_finish_at), MB(io_barmax));
+	}
+
+	if (io_write && test_result_state(TOI_ABORTED))
+		io_result = 1;
+	else { /* All I/O done? */
+		if  (get_next_bit_on(&io_map, max_pfn + 1) != max_pfn + 1) {
+			printk(KERN_INFO "Finished I/O loop but still work to "
+					"do?\nFinish at = %d. io_count = %d.\n",
+					finish_at, atomic_read(&io_count));
+			BUG();
+		}
+	}
+
+	return io_result;
+}
+
+/* write_pageset()
+ *
+ * Description:	Write a pageset to disk.
+ * Arguments:	pagedir:	Which pagedir to write..
+ * Returns:	Zero on success or -1 on failure.
+ */
+
+int write_pageset(struct pagedir *pagedir)
+{
+	int finish_at, base = 0, start_time, end_time;
+	int barmax = pagedir1.size + pagedir2.size;
+	long error = 0;
+	struct dyn_pageflags *pageflags;
+
+	/*
+	 * Even if there is nothing to read or write, the allocator
+	 * may need the init/cleanup for it's housekeeping.  (eg:
+	 * Pageset1 may start where pageset2 ends when writing).
+	 */
+	finish_at = pagedir->size;
+
+	if (pagedir->id == 1) {
+		toi_prepare_status(DONT_CLEAR_BAR,
+				"Writing kernel & process data...");
+		base = pagedir2.size;
+		if (test_action_state(TOI_TEST_FILTER_SPEED) ||
+		    test_action_state(TOI_TEST_BIO))
+			pageflags = &pageset1_map;
+		else
+			pageflags = &pageset1_copy_map;
+	} else {
+		toi_prepare_status(CLEAR_BAR, "Writing caches...");
+		pageflags = &pageset2_map;
+	}
+
+	start_time = jiffies;
+
+	if (rw_init_modules(1, pagedir->id)) {
+		abort_hibernate(TOI_FAILED_MODULE_INIT,
+				"Failed to initialise modules for writing.");
+		error = 1;
+	}
+
+	if (!error)
+		error = do_rw_loop(1, finish_at, pageflags, base, barmax,
+				pagedir->id);
+
+	if (rw_cleanup_modules(WRITE) && !error) {
+		abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+				"Failed to cleanup after writing.");
+		error = 1;
+	}
+
+	end_time = jiffies;
+
+	if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+		toi_bkd.toi_io_time[0][0] += finish_at,
+		toi_bkd.toi_io_time[0][1] += (end_time - start_time);
+	}
+
+	return error;
+}
+
+/* read_pageset()
+ *
+ * Description:	Read a pageset from disk.
+ * Arguments:	whichtowrite:	Controls what debugging output is printed.
+ * 		overwrittenpagesonly: Whether to read the whole pageset or
+ * 		only part.
+ * Returns:	Zero on success or -1 on failure.
+ */
+
+static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
+{
+	int result = 0, base = 0, start_time, end_time;
+	int finish_at = pagedir->size;
+	int barmax = pagedir1.size + pagedir2.size;
+	struct dyn_pageflags *pageflags;
+
+	if (pagedir->id == 1) {
+		toi_prepare_status(CLEAR_BAR,
+				"Reading kernel & process data...");
+		pageflags = &pageset1_map;
+	} else {
+		toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
+		if (overwrittenpagesonly)
+			barmax = finish_at = min(pagedir1.size,
+						 pagedir2.size);
+		else
+			base = pagedir1.size;
+		pageflags = &pageset2_map;
+	}
+
+	start_time = jiffies;
+
+	if (rw_init_modules(0, pagedir->id)) {
+		toiActiveAllocator->remove_image();
+		result = 1;
+	} else
+		result = do_rw_loop(0, finish_at, pageflags, base, barmax,
+				pagedir->id);
+
+	if (rw_cleanup_modules(READ) && !result) {
+		abort_hibernate(TOI_FAILED_MODULE_CLEANUP,
+				"Failed to cleanup after reading.");
+		result = 1;
+	}
+
+	/* Statistics */
+	end_time = jiffies;
+
+	if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
+		toi_bkd.toi_io_time[1][0] += finish_at,
+		toi_bkd.toi_io_time[1][1] += (end_time - start_time);
+	}
+
+	return result;
+}
+
+/* write_module_configs()
+ *
+ * Description:	Store the configuration for each module in the image header.
+ * Returns:	Int: Zero on success, Error value otherwise.
+ */
+static int write_module_configs(void)
+{
+	struct toi_module_ops *this_module;
+	char *buffer = (char *) toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
+	int len, index = 1;
+	struct toi_module_header toi_module_header;
+
+	if (!buffer) {
+		printk(KERN_INFO "Failed to allocate a buffer for saving "
+				"module configuration info.\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * We have to know which data goes with which module, so we at
+	 * least write a length of zero for a module. Note that we are
+	 * also assuming every module's config data takes <= PAGE_SIZE.
+	 */
+
+	/* For each module (in registration order) */
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled || !this_module->storage_needed ||
+		    (this_module->type == WRITER_MODULE &&
+		     toiActiveAllocator != this_module))
+			continue;
+
+		/* Get the data from the module */
+		len = 0;
+		if (this_module->save_config_info)
+			len = this_module->save_config_info(buffer);
+
+		/* Save the details of the module */
+		toi_module_header.enabled = this_module->enabled;
+		toi_module_header.type = this_module->type;
+		toi_module_header.index = index++;
+		strncpy(toi_module_header.name, this_module->name,
+					sizeof(toi_module_header.name));
+		toiActiveAllocator->rw_header_chunk(WRITE,
+				this_module,
+				(char *) &toi_module_header,
+				sizeof(toi_module_header));
+
+		/* Save the size of the data and any data returned */
+		toiActiveAllocator->rw_header_chunk(WRITE,
+				this_module,
+				(char *) &len, sizeof(int));
+		if (len)
+			toiActiveAllocator->rw_header_chunk(
+				WRITE, this_module, buffer, len);
+	}
+
+	/* Write a blank header to terminate the list */
+	toi_module_header.name[0] = '\0';
+	toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+			(char *) &toi_module_header, sizeof(toi_module_header));
+
+	toi_free_page(22, (unsigned long) buffer);
+	return 0;
+}
+
+/* read_module_configs()
+ *
+ * Description:	Reload module configurations from the image header.
+ * Returns:	Int. Zero on success, error value otherwise.
+ */
+
+static int read_module_configs(void)
+{
+	struct toi_module_ops *this_module;
+	char *buffer = (char *) toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
+	int len, result = 0;
+	struct toi_module_header toi_module_header;
+
+	if (!buffer) {
+		printk("Failed to allocate a buffer for reloading module "
+				"configuration info.\n");
+		return -ENOMEM;
+	}
+
+	/* All modules are initially disabled. That way, if we have a module
+	 * loaded now that wasn't loaded when we hibernated, it won't be used
+	 * in trying to read the data.
+	 */
+	list_for_each_entry(this_module, &toi_modules, module_list)
+		this_module->enabled = 0;
+
+	/* Get the first module header */
+	result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+			(char *) &toi_module_header,
+			sizeof(toi_module_header));
+	if (result) {
+		printk("Failed to read the next module header.\n");
+		toi_free_page(23, (unsigned long) buffer);
+		return -EINVAL;
+	}
+
+	/* For each module (in registration order) */
+	while (toi_module_header.name[0]) {
+
+		/* Find the module */
+		this_module =
+			toi_find_module_given_name(toi_module_header.name);
+
+		if (!this_module) {
+			/*
+			 * Is it used? Only need to worry about filters. The
+			 * active allocator must be loaded!
+			 */
+			if (toi_module_header.enabled) {
+				toi_early_boot_message(1, TOI_CONTINUE_REQ,
+					"It looks like we need module %s for "
+					"reading the image but it hasn't been "
+					"registered.\n",
+					toi_module_header.name);
+				if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+					toi_free_page(23,
+							(unsigned long) buffer);
+					return -EINVAL;
+				}
+			} else
+				printk(KERN_INFO "Module %s configuration data "
+					"found, but the module hasn't "
+					"registered. Looks like it was "
+					"disabled, so we're ignoring its data.",
+					toi_module_header.name);
+		}
+
+		/* Get the length of the data (if any) */
+		result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+				(char *) &len, sizeof(int));
+		if (result) {
+			printk("Failed to read the length of the module %s's"
+					" configuration data.\n",
+					toi_module_header.name);
+			toi_free_page(23, (unsigned long) buffer);
+			return -EINVAL;
+		}
+
+		/* Read any data and pass to the module (if we found one) */
+		if (len) {
+			toiActiveAllocator->rw_header_chunk(READ, NULL,
+					buffer, len);
+			if (this_module) {
+				if (!this_module->save_config_info) {
+					printk("Huh? Module %s appears to have "
+						"a save_config_info, but not a "
+						"load_config_info function!\n",
+						this_module->name);
+				} else
+					this_module->load_config_info(buffer,
+							len);
+			}
+		}
+
+		if (this_module) {
+			/* Now move this module to the tail of its lists. This
+			 * will put it in order. Any new modules will end up at
+			 * the top of the lists. They should have been set to
+			 * disabled when loaded (people will normally not edit
+			 * an initrd to load a new module and then hibernate
+			 * without using it!).
+			 */
+
+			toi_move_module_tail(this_module);
+
+			/*
+			 * We apply the disabled state; modules don't need to
+			 * save whether they were disabled and if they do, we
+			 * override them anyway.
+			 */
+			this_module->enabled = toi_module_header.enabled;
+		}
+
+		/* Get the next module header */
+		result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+				(char *) &toi_module_header,
+				sizeof(toi_module_header));
+
+		if (result) {
+			printk("Failed to read the next module header.\n");
+			toi_free_page(23, (unsigned long) buffer);
+			return -EINVAL;
+		}
+
+	}
+
+	toi_free_page(23, (unsigned long) buffer);
+	return 0;
+}
+
+/* write_image_header()
+ *
+ * Description:	Write the image header after write the image proper.
+ * Returns:	Int. Zero on success or -1 on failure.
+ */
+
+int write_image_header(void)
+{
+	int ret;
+	int total = pagedir1.size + pagedir2.size+2;
+	char *header_buffer = NULL;
+
+	/* Now prepare to write the header */
+	ret = toiActiveAllocator->write_header_init();
+	if (ret) {
+		abort_hibernate(TOI_FAILED_MODULE_INIT,
+				"Active allocator's write_header_init"
+				" function failed.");
+		goto write_image_header_abort;
+	}
+
+	/* Get a buffer */
+	header_buffer = (char *) toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
+	if (!header_buffer) {
+		abort_hibernate(TOI_OUT_OF_MEMORY,
+			"Out of memory when trying to get page for header!");
+		goto write_image_header_abort;
+	}
+
+	/* Write hibernate header */
+	if (fill_toi_header((struct toi_header *) header_buffer)) {
+		abort_hibernate(TOI_OUT_OF_MEMORY,
+			"Failure to fill header information!");
+		goto write_image_header_abort;
+	}
+	toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+			header_buffer, sizeof(struct toi_header));
+
+	toi_free_page(24, (unsigned long) header_buffer);
+
+	/* Write module configurations */
+	ret = write_module_configs();
+	if (ret) {
+		abort_hibernate(TOI_FAILED_IO,
+				"Failed to write module configs.");
+		goto write_image_header_abort;
+	}
+
+	save_dyn_pageflags(&pageset1_map);
+
+	/* Flush data and let allocator cleanup */
+	if (toiActiveAllocator->write_header_cleanup()) {
+		abort_hibernate(TOI_FAILED_IO,
+				"Failed to cleanup writing header.");
+		goto write_image_header_abort_no_cleanup;
+	}
+
+	if (test_result_state(TOI_ABORTED))
+		goto write_image_header_abort_no_cleanup;
+
+	toi_update_status(total, total, NULL);
+
+	return 0;
+
+write_image_header_abort:
+	toiActiveAllocator->write_header_cleanup();
+write_image_header_abort_no_cleanup:
+	return -1;
+}
+
+/* sanity_check()
+ *
+ * Description:	Perform a few checks, seeking to ensure that the kernel being
+ * 		booted matches the one hibernated. They need to match so we can
+ * 		be _sure_ things will work. It is not absolutely impossible for
+ * 		resuming from a different kernel to work, just not assured.
+ * Arguments:	Struct toi_header. The header which was saved at hibernate
+ * 		time.
+ */
+static char *sanity_check(struct toi_header *sh)
+{
+	char *reason = check_swsusp_image_kernel((struct swsusp_info *) sh);
+
+	if (reason)
+		return reason;
+
+	if (!test_action_state(TOI_IGNORE_ROOTFS)) {
+		const struct super_block *sb;
+		list_for_each_entry(sb, &super_blocks, s_list) {
+			if ((!(sb->s_flags & MS_RDONLY)) &&
+			    (sb->s_type->fs_flags & FS_REQUIRES_DEV))
+				return "Device backed fs has been mounted "
+					"rw prior to resume or initrd/ramfs "
+					"is mounted rw.";
+		}
+	}
+
+	return 0;
+}
+
+/* __read_pageset1
+ *
+ * Description:	Test for the existence of an image and attempt to load it.
+ * Returns:	Int. Zero if image found and pageset1 successfully loaded.
+ * 		Error if no image found or loaded.
+ */
+static int __read_pageset1(void)
+{
+	int i, result = 0;
+	char *header_buffer = (char *) toi_get_zeroed_page(25, TOI_ATOMIC_GFP),
+	     *sanity_error = NULL;
+	struct toi_header *toi_header;
+
+	if (!header_buffer) {
+		printk(KERN_INFO "Unable to allocate a page for reading the "
+				"signature.\n");
+		return -ENOMEM;
+	}
+
+	/* Check for an image */
+	result = toiActiveAllocator->image_exists(1);
+	if (!result) {
+		result = -ENODATA;
+		noresume_reset_modules();
+		printk(KERN_INFO "TuxOnIce: No image found.\n");
+		goto out;
+	}
+
+	/*
+	 * Prepare the active allocator for reading the image header. The
+	 * activate allocator might read its own configuration.
+	 *
+	 * NB: This call may never return because there might be a signature
+	 * for a different image such that we warn the user and they choose
+	 * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
+	 * location of the image might be unavailable if it was stored on a
+	 * network connection).
+	 */
+
+	result = toiActiveAllocator->read_header_init();
+	if (result) {
+		printk(KERN_INFO "TuxOnIce: Failed to initialise, reading the "
+				"image header.\n");
+		goto out_remove_image;
+	}
+
+	/* Check for noresume command line option */
+	if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
+		printk(KERN_INFO "TuxOnIce: Noresume on command line. Removed "
+				"image.\n");
+		goto out_remove_image;
+	}
+
+	/* Check whether we've resumed before */
+	if (test_toi_state(TOI_RESUMED_BEFORE)) {
+		toi_early_boot_message(1, 0, NULL);
+		if (!(test_toi_state(TOI_CONTINUE_REQ))) {
+			printk(KERN_INFO "TuxOnIce: Tried to resume before: "
+					"Invalidated image.\n");
+			goto out_remove_image;
+		}
+	}
+
+	clear_toi_state(TOI_CONTINUE_REQ);
+
+	/* Read hibernate header */
+	result = toiActiveAllocator->rw_header_chunk(READ, NULL,
+			header_buffer, sizeof(struct toi_header));
+	if (result < 0) {
+		printk("TuxOnIce: Failed to read the image signature.\n");
+		goto out_remove_image;
+	}
+
+	toi_header = (struct toi_header *) header_buffer;
+
+	/*
+	 * NB: This call may also result in a reboot rather than returning.
+	 */
+
+	sanity_error = sanity_check(toi_header);
+	if (sanity_error) {
+		toi_early_boot_message(1, TOI_CONTINUE_REQ,
+				sanity_error);
+		printk(KERN_INFO "TuxOnIce: Sanity check failed.\n");
+		goto out_remove_image;
+	}
+
+	/*
+	 * We have an image and it looks like it will load okay.
+	 *
+	 * Get metadata from header. Don't override commandline parameters.
+	 *
+	 * We don't need to save the image size limit because it's not used
+	 * during resume and will be restored with the image anyway.
+	 */
+
+	memcpy((char *) &pagedir1,
+		(char *) &toi_header->pagedir, sizeof(pagedir1));
+	toi_result = toi_header->param0;
+	toi_bkd.toi_action = toi_header->param1;
+	toi_bkd.toi_debug_state = toi_header->param2;
+	toi_bkd.toi_default_console_level = toi_header->param3;
+	clear_toi_state(TOI_IGNORE_LOGLEVEL);
+	pagedir2.size = toi_header->pageset_2_size;
+	for (i = 0; i < 4; i++)
+		toi_bkd.toi_io_time[i/2][i%2] =
+			toi_header->io_time[i/2][i%2];
+	boot_kernel_data_buffer = toi_header->bkd;
+
+	/* Read module configurations */
+	result = read_module_configs();
+	if (result) {
+		pagedir1.size = pagedir2.size = 0;
+		printk(KERN_INFO "TuxOnIce: Failed to read TuxOnIce module "
+				"configurations.\n");
+		clear_action_state(TOI_KEEP_IMAGE);
+		goto out_remove_image;
+	}
+
+	toi_prepare_console();
+
+	set_toi_state(TOI_NOW_RESUMING);
+
+	if (pre_resume_freeze())
+		goto out_reset_console;
+
+	toi_cond_pause(1, "About to read original pageset1 locations.");
+
+	/*
+	 * Read original pageset1 locations. These are the addresses we can't
+	 * use for the data to be restored.
+	 */
+
+	if (allocate_dyn_pageflags(&pageset1_map, 0) ||
+	    allocate_dyn_pageflags(&pageset1_copy_map, 0) ||
+	    allocate_dyn_pageflags(&io_map, 0))
+		goto out_reset_console;
+
+	if (load_dyn_pageflags(&pageset1_map))
+		goto out_reset_console;
+
+	/* Clean up after reading the header */
+	result = toiActiveAllocator->read_header_cleanup();
+	if (result) {
+		printk("TuxOnIce: Failed to cleanup after reading the image "
+				"header.\n");
+		goto out_reset_console;
+	}
+
+	toi_cond_pause(1, "About to read pagedir.");
+
+	/*
+	 * Get the addresses of pages into which we will load the kernel to
+	 * be copied back
+	 */
+	if (toi_get_pageset1_load_addresses()) {
+		printk(KERN_INFO "TuxOnIce: Failed to get load addresses for "
+				"pageset1.\n");
+		goto out_reset_console;
+	}
+
+	/* Read the original kernel back */
+	toi_cond_pause(1, "About to read pageset 1.");
+
+	if (read_pageset(&pagedir1, 0)) {
+		toi_prepare_status(CLEAR_BAR, "Failed to read pageset 1.");
+		result = -EIO;
+		printk(KERN_INFO "TuxOnIce: Failed to get load pageset1.\n");
+		goto out_reset_console;
+	}
+
+	toi_cond_pause(1, "About to restore original kernel.");
+	result = 0;
+
+	if (!test_action_state(TOI_KEEP_IMAGE) &&
+	    toiActiveAllocator->mark_resume_attempted)
+		toiActiveAllocator->mark_resume_attempted(1);
+
+out:
+	toi_free_page(25, (unsigned long) header_buffer);
+	return result;
+
+out_reset_console:
+	toi_cleanup_console();
+
+out_remove_image:
+	free_dyn_pageflags(&pageset1_map);
+	free_dyn_pageflags(&pageset1_copy_map);
+	free_dyn_pageflags(&io_map);
+	result = -EINVAL;
+	if (!test_action_state(TOI_KEEP_IMAGE))
+		toiActiveAllocator->remove_image();
+	toiActiveAllocator->read_header_cleanup();
+	noresume_reset_modules();
+	goto out;
+}
+
+/* read_pageset1()
+ *
+ * Description:	Attempt to read the header and pageset1 of a hibernate image.
+ * 		Handle the outcome, complaining where appropriate.
+ */
+
+int read_pageset1(void)
+{
+	int error;
+
+	error = __read_pageset1();
+
+	if (error && error != -ENODATA && error != -EINVAL &&
+					!test_result_state(TOI_ABORTED))
+		abort_hibernate(TOI_IMAGE_ERROR,
+			"TuxOnIce: Error %d resuming\n", error);
+
+	return error;
+}
+
+/*
+ * get_have_image_data()
+ */
+static char *get_have_image_data(void)
+{
+	char *output_buffer = (char *) toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
+	struct toi_header *toi_header;
+
+	if (!output_buffer) {
+		printk(KERN_INFO "Output buffer null.\n");
+		return NULL;
+	}
+
+	/* Check for an image */
+	if (!toiActiveAllocator->image_exists(1) ||
+	    toiActiveAllocator->read_header_init() ||
+	    toiActiveAllocator->rw_header_chunk(READ, NULL,
+			output_buffer, sizeof(struct toi_header))) {
+		sprintf(output_buffer, "0\n");
+		/*
+		 * From an initrd/ramfs, catting have_image and
+		 * getting a result of 0 is sufficient.
+		 */
+		clear_toi_state(TOI_BOOT_TIME);
+		goto out;
+	}
+
+	toi_header = (struct toi_header *) output_buffer;
+
+	sprintf(output_buffer, "1\n%s\n%s\n",
+			toi_header->uts.machine,
+			toi_header->uts.version);
+
+	/* Check whether we've resumed before */
+	if (test_toi_state(TOI_RESUMED_BEFORE))
+		strcat(output_buffer, "Resumed before.\n");
+
+out:
+	noresume_reset_modules();
+	return output_buffer;
+}
+
+/* read_pageset2()
+ *
+ * Description:	Read in part or all of pageset2 of an image, depending upon
+ * 		whether we are hibernating and have only overwritten a portion
+ * 		with pageset1 pages, or are resuming and need to read them
+ * 		all.
+ * Arguments:	Int. Boolean. Read only pages which would have been
+ * 		overwritten by pageset1?
+ * Returns:	Int. Zero if no error, otherwise the error value.
+ */
+int read_pageset2(int overwrittenpagesonly)
+{
+	int result = 0;
+
+	if (!pagedir2.size)
+		return 0;
+
+	result = read_pageset(&pagedir2, overwrittenpagesonly);
+
+	toi_update_status(100, 100, NULL);
+	toi_cond_pause(1, "Pagedir 2 read.");
+
+	return result;
+}
+
+/* image_exists_read
+ *
+ * Return 0 or 1, depending on whether an image is found.
+ * Incoming buffer is PAGE_SIZE and result is guaranteed
+ * to be far less than that, so we don't worry about
+ * overflow.
+ */
+int image_exists_read(const char *page, int count)
+{
+	int len = 0;
+	char *result;
+
+	if (toi_activate_storage(0))
+		return count;
+
+	if (!test_toi_state(TOI_RESUME_DEVICE_OK))
+		toi_attempt_to_parse_resume_device(0);
+
+	if (!toiActiveAllocator) {
+		len = sprintf((char *) page, "-1\n");
+	} else {
+		result = get_have_image_data();
+		if (result) {
+			len = sprintf((char *) page, "%s",  result);
+			toi_free_page(26, (unsigned long) result);
+		}
+	}
+
+	toi_deactivate_storage(0);
+
+	return len;
+}
+
+/* image_exists_write
+ *
+ * Invalidate an image if one exists.
+ */
+int image_exists_write(const char *buffer, int count)
+{
+	if (toi_activate_storage(0))
+		return count;
+
+	if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
+		toiActiveAllocator->remove_image();
+
+	toi_deactivate_storage(0);
+
+	clear_result_state(TOI_KEPT_IMAGE);
+
+	return count;
+}
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device);
+EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2);
+EXPORT_SYMBOL_GPL(toi_io_workers);
+EXPORT_SYMBOL_GPL(toi_io_queue_flusher);
+EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish);
+#endif
+
diff -Npur linux-2.6-block/kernel/power/tuxonice_io.h linux-2.6-block-custom/kernel/power/tuxonice_io.h
--- linux-2.6-block/kernel/power/tuxonice_io.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_io.h	2008-09-26 19:48:23.995776559 +0900
@@ -0,0 +1,71 @@
+/*
+ * kernel/power/tuxonice_io.h
+ *
+ * Copyright (C) 2005-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains high level IO routines for hibernating.
+ *
+ */
+
+#include <linux/utsname.h>
+#include "tuxonice_pagedir.h"
+#include "power.h"
+
+/* Non-module data saved in our image header */
+struct toi_header {
+	/*
+	 * Mirror struct swsusp_info, but without
+	 * the page aligned attribute
+	 */
+	struct new_utsname uts;
+	u32 version_code;
+	unsigned long num_physpages;
+	int cpus;
+	unsigned long image_pages;
+	unsigned long pages;
+	unsigned long size;
+
+	/* Our own data */
+	unsigned long orig_mem_free;
+	int page_size;
+	int pageset_2_size;
+	int param0;
+	int param1;
+	int param2;
+	int param3;
+	int progress0;
+	int progress1;
+	int progress2;
+	int progress3;
+	int io_time[2][2];
+	struct pagedir pagedir;
+	dev_t root_fs;
+	unsigned long bkd; /* Boot kernel data locn */
+};
+
+extern int write_pageset(struct pagedir *pagedir);
+extern int write_image_header(void);
+extern int read_pageset1(void);
+extern int read_pageset2(int overwrittenpagesonly);
+
+extern int toi_attempt_to_parse_resume_device(int quiet);
+extern void attempt_to_parse_resume_device2(void);
+extern void attempt_to_parse_alt_resume_param(void);
+int image_exists_read(const char *page, int count);
+int image_exists_write(const char *buffer, int count);
+extern void save_restore_alt_param(int replace, int quiet);
+extern atomic_t toi_io_workers;
+
+/* Args to save_restore_alt_param */
+#define RESTORE 0
+#define SAVE 1
+
+#define NOQUIET 0
+#define QUIET 1
+
+extern dev_t name_to_dev_t(char *line);
+
+extern wait_queue_head_t toi_io_queue_flusher;
+extern int toi_bio_queue_flusher_should_finish;
diff -Npur linux-2.6-block/kernel/power/tuxonice_modules.c linux-2.6-block-custom/kernel/power/tuxonice_modules.c
--- linux-2.6-block/kernel/power/tuxonice_modules.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_modules.c	2008-09-26 19:48:23.999808217 +0900
@@ -0,0 +1,465 @@
+/*
+ * kernel/power/tuxonice_modules.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_ui.h"
+
+LIST_HEAD(toi_filters);
+LIST_HEAD(toiAllocators);
+LIST_HEAD(toi_modules);
+
+struct toi_module_ops *toiActiveAllocator;
+int toi_num_filters;
+int toiNumAllocators, toi_num_modules;
+
+/*
+ * toi_header_storage_for_modules
+ *
+ * Returns the amount of space needed to store configuration
+ * data needed by the modules prior to copying back the original
+ * kernel. We can exclude data for pageset2 because it will be
+ * available anyway once the kernel is copied back.
+ */
+long toi_header_storage_for_modules(void)
+{
+	struct toi_module_ops *this_module;
+	int bytes = 0;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled ||
+		    (this_module->type == WRITER_MODULE &&
+		     toiActiveAllocator != this_module))
+			continue;
+		if (this_module->storage_needed) {
+			int this = this_module->storage_needed() +
+				sizeof(struct toi_module_header) +
+				sizeof(int);
+			this_module->header_requested = this;
+			bytes += this;
+		}
+	}
+
+	/* One more for the empty terminator */
+	return bytes + sizeof(struct toi_module_header);
+}
+
+/*
+ * toi_memory_for_modules
+ *
+ * Returns the amount of memory requested by modules for
+ * doing their work during the cycle.
+ */
+
+long toi_memory_for_modules(int print_parts)
+{
+	long bytes = 0, result;
+	struct toi_module_ops *this_module;
+
+	if (print_parts)
+		printk(KERN_INFO "Memory for modules:\n===================\n");
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		int this;
+		if (!this_module->enabled)
+			continue;
+		if (this_module->memory_needed) {
+			this = this_module->memory_needed();
+			if (print_parts)
+				printk(KERN_INFO "%10d bytes (%5ld pages) for "
+						"module '%s'.\n", this,
+						DIV_ROUND_UP(this, PAGE_SIZE),
+						this_module->name);
+			bytes += this;
+		}
+	}
+
+	result = DIV_ROUND_UP(bytes, PAGE_SIZE);
+	if (print_parts)
+		printk(KERN_INFO " => %ld bytes, %ld pages.\n", bytes, result);
+
+	return result;
+}
+
+/*
+ * toi_expected_compression_ratio
+ *
+ * Returns the compression ratio expected when saving the image.
+ */
+
+int toi_expected_compression_ratio(void)
+{
+	int ratio = 100;
+	struct toi_module_ops *this_module;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled)
+			continue;
+		if (this_module->expected_compression)
+			ratio = ratio * this_module->expected_compression()
+				/ 100;
+	}
+
+	return ratio;
+}
+
+/* toi_find_module_given_dir
+ * Functionality :	Return a module (if found), given a pointer
+ * 			to its directory name
+ */
+
+static struct toi_module_ops *toi_find_module_given_dir(char *name)
+{
+	struct toi_module_ops *this_module, *found_module = NULL;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!strcmp(name, this_module->directory)) {
+			found_module = this_module;
+			break;
+		}
+	}
+
+	return found_module;
+}
+
+/* toi_find_module_given_name
+ * Functionality :	Return a module (if found), given a pointer
+ * 			to its name
+ */
+
+struct toi_module_ops *toi_find_module_given_name(char *name)
+{
+	struct toi_module_ops *this_module, *found_module = NULL;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!strcmp(name, this_module->name)) {
+			found_module = this_module;
+			break;
+		}
+	}
+
+	return found_module;
+}
+
+/*
+ * toi_print_module_debug_info
+ * Functionality   : Get debugging info from modules into a buffer.
+ */
+int toi_print_module_debug_info(char *buffer, int buffer_size)
+{
+	struct toi_module_ops *this_module;
+	int len = 0;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled)
+			continue;
+		if (this_module->print_debug_info) {
+			int result;
+			result = this_module->print_debug_info(buffer + len,
+					buffer_size - len);
+			len += result;
+		}
+	}
+
+	/* Ensure null terminated */
+	buffer[buffer_size] = 0;
+
+	return len;
+}
+
+/*
+ * toi_register_module
+ *
+ * Register a module.
+ */
+int toi_register_module(struct toi_module_ops *module)
+{
+	int i;
+	struct kobject *kobj;
+
+	module->enabled = 1;
+
+	if (toi_find_module_given_name(module->name)) {
+		printk(KERN_INFO "TuxOnIce: Trying to load module %s,"
+				" which is already registered.\n",
+				module->name);
+		return -EBUSY;
+	}
+
+	switch (module->type) {
+	case FILTER_MODULE:
+		list_add_tail(&module->type_list, &toi_filters);
+		toi_num_filters++;
+		break;
+	case WRITER_MODULE:
+		list_add_tail(&module->type_list, &toiAllocators);
+		toiNumAllocators++;
+		break;
+	case MISC_MODULE:
+	case MISC_HIDDEN_MODULE:
+		break;
+	default:
+		printk("Hmmm. Module '%s' has an invalid type."
+			" It has been ignored.\n", module->name);
+		return -EINVAL;
+	}
+	list_add_tail(&module->module_list, &toi_modules);
+	toi_num_modules++;
+
+	if (!module->directory && !module->shared_directory)
+		return 0;
+
+	/*
+	 * Modules may share a directory, but those with shared_dir
+	 * set must be loaded (via symbol dependencies) after parents
+	 * and unloaded beforehand.
+	 */
+	if (module->shared_directory) {
+		struct toi_module_ops *shared =
+			toi_find_module_given_dir(module->shared_directory);
+		if (!shared) {
+			printk("TuxOnIce: Module %s wants to share %s's "
+					"directory but %s isn't loaded.\n",
+					module->name, module->shared_directory,
+					module->shared_directory);
+			toi_unregister_module(module);
+			return -ENODEV;
+		}
+		kobj = shared->dir_kobj;
+	} else {
+		if (!strncmp(module->directory, "[ROOT]", 6))
+			kobj = tuxonice_kobj;
+		else
+			kobj = make_toi_sysdir(module->directory);
+	}
+	module->dir_kobj = kobj;
+	for (i = 0; i < module->num_sysfs_entries; i++) {
+		int result = toi_register_sysfs_file(kobj,
+				&module->sysfs_data[i]);
+		if (result)
+			return result;
+	}
+	return 0;
+}
+
+/*
+ * toi_unregister_module
+ *
+ * Remove a module.
+ */
+void toi_unregister_module(struct toi_module_ops *module)
+{
+	int i;
+
+	if (module->dir_kobj)
+		for (i = 0; i < module->num_sysfs_entries; i++)
+			toi_unregister_sysfs_file(module->dir_kobj,
+					&module->sysfs_data[i]);
+
+	if (!module->shared_directory && module->directory &&
+			strncmp(module->directory, "[ROOT]", 6))
+		remove_toi_sysdir(module->dir_kobj);
+
+	switch (module->type) {
+	case FILTER_MODULE:
+		list_del(&module->type_list);
+		toi_num_filters--;
+		break;
+	case WRITER_MODULE:
+		list_del(&module->type_list);
+		toiNumAllocators--;
+		if (toiActiveAllocator == module) {
+			toiActiveAllocator = NULL;
+			clear_toi_state(TOI_CAN_RESUME);
+			clear_toi_state(TOI_CAN_HIBERNATE);
+		}
+		break;
+	case MISC_MODULE:
+	case MISC_HIDDEN_MODULE:
+		break;
+	default:
+		printk("Hmmm. Module '%s' has an invalid type."
+			" It has been ignored.\n", module->name);
+		return;
+	}
+	list_del(&module->module_list);
+	toi_num_modules--;
+}
+
+/*
+ * toi_move_module_tail
+ *
+ * Rearrange modules when reloading the config.
+ */
+void toi_move_module_tail(struct toi_module_ops *module)
+{
+	switch (module->type) {
+	case FILTER_MODULE:
+		if (toi_num_filters > 1)
+			list_move_tail(&module->type_list, &toi_filters);
+		break;
+	case WRITER_MODULE:
+		if (toiNumAllocators > 1)
+			list_move_tail(&module->type_list, &toiAllocators);
+		break;
+	case MISC_MODULE:
+	case MISC_HIDDEN_MODULE:
+		break;
+	default:
+		printk("Hmmm. Module '%s' has an invalid type."
+			" It has been ignored.\n", module->name);
+		return;
+	}
+	if ((toi_num_filters + toiNumAllocators) > 1)
+		list_move_tail(&module->module_list, &toi_modules);
+}
+
+/*
+ * toi_initialise_modules
+ *
+ * Get ready to do some work!
+ */
+int toi_initialise_modules(int starting_cycle, int early)
+{
+	struct toi_module_ops *this_module;
+	int result;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		this_module->header_requested = 0;
+		this_module->header_used = 0;
+		if (!this_module->enabled)
+			continue;
+		if (this_module->early != early)
+			continue;
+		if (this_module->initialise) {
+			toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
+				"Initialising module %s.\n",
+				this_module->name);
+			result = this_module->initialise(starting_cycle);
+			if (result) {
+				toi_cleanup_modules(starting_cycle);
+				return result;
+			}
+			this_module->initialised = 1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * toi_cleanup_modules
+ *
+ * Tell modules the work is done.
+ */
+void toi_cleanup_modules(int finishing_cycle)
+{
+	struct toi_module_ops *this_module;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (!this_module->enabled || !this_module->initialised)
+			continue;
+		if (this_module->cleanup) {
+			toi_message(TOI_MEMORY, TOI_MEDIUM, 1,
+				"Cleaning up module %s.\n",
+				this_module->name);
+			this_module->cleanup(finishing_cycle);
+		}
+		this_module->initialised = 0;
+	}
+}
+
+/*
+ * toi_get_next_filter
+ *
+ * Get the next filter in the pipeline.
+ */
+struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *filter_sought)
+{
+	struct toi_module_ops *last_filter = NULL, *this_filter = NULL;
+
+	list_for_each_entry(this_filter, &toi_filters, type_list) {
+		if (!this_filter->enabled)
+			continue;
+		if ((last_filter == filter_sought) || (!filter_sought))
+			return this_filter;
+		last_filter = this_filter;
+	}
+
+	return toiActiveAllocator;
+}
+
+/**
+ * toi_show_modules: Printk what support is loaded.
+ */
+void toi_print_modules(void)
+{
+	struct toi_module_ops *this_module;
+	int prev = 0;
+
+	printk("TuxOnIce " TOI_CORE_VERSION ", with support for");
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		if (this_module->type == MISC_HIDDEN_MODULE)
+			continue;
+		printk("%s %s%s%s", prev ? "," : "",
+				this_module->enabled ? "" : "[",
+				this_module->name,
+				this_module->enabled ? "" : "]");
+		prev = 1;
+	}
+
+	printk(".\n");
+}
+
+/* toi_get_modules
+ *
+ * Take a reference to modules so they can't go away under us.
+ */
+
+int toi_get_modules(void)
+{
+	struct toi_module_ops *this_module;
+
+	list_for_each_entry(this_module, &toi_modules, module_list) {
+		struct toi_module_ops *this_module2;
+
+		if (try_module_get(this_module->module))
+			continue;
+
+		/* Failed! Reverse gets and return error */
+		list_for_each_entry(this_module2, &toi_modules,
+				module_list) {
+			if (this_module == this_module2)
+				return -EINVAL;
+			module_put(this_module2->module);
+		}
+	}
+	return 0;
+}
+
+/* toi_put_modules
+ *
+ * Release our references to modules we used.
+ */
+
+void toi_put_modules(void)
+{
+	struct toi_module_ops *this_module;
+
+	list_for_each_entry(this_module, &toi_modules, module_list)
+		module_put(this_module->module);
+}
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(toi_register_module);
+EXPORT_SYMBOL_GPL(toi_unregister_module);
+EXPORT_SYMBOL_GPL(toi_get_next_filter);
+EXPORT_SYMBOL_GPL(toiActiveAllocator);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_modules.h linux-2.6-block-custom/kernel/power/tuxonice_modules.h
--- linux-2.6-block/kernel/power/tuxonice_modules.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_modules.h	2008-09-26 19:48:23.999808217 +0900
@@ -0,0 +1,176 @@
+/*
+ * kernel/power/tuxonice_modules.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * It contains declarations for modules. Modules are additions to
+ * TuxOnIce that provide facilities such as image compression or
+ * encryption, backends for storage of the image and user interfaces.
+ *
+ */
+
+#ifndef TOI_MODULES_H
+#define TOI_MODULES_H
+
+/* This is the maximum size we store in the image header for a module name */
+#define TOI_MAX_MODULE_NAME_LENGTH 30
+
+/* Per-module metadata */
+struct toi_module_header {
+	char name[TOI_MAX_MODULE_NAME_LENGTH];
+	int enabled;
+	int type;
+	int index;
+	int data_length;
+	unsigned long signature;
+};
+
+enum {
+	FILTER_MODULE,
+	WRITER_MODULE,
+	MISC_MODULE, /* Block writer, eg. */
+	MISC_HIDDEN_MODULE,
+};
+
+enum {
+	TOI_ASYNC,
+	TOI_SYNC
+};
+
+struct toi_module_ops {
+	/* Functions common to all modules */
+	int type;
+	char *name;
+	char *directory;
+	char *shared_directory;
+	struct kobject *dir_kobj;
+	struct module *module;
+	int enabled, early, initialised;
+	struct list_head module_list;
+
+	/* List of filters or allocators */
+	struct list_head list, type_list;
+
+	/*
+	 * Requirements for memory and storage in
+	 * the image header..
+	 */
+	int (*memory_needed) (void);
+	int (*storage_needed) (void);
+
+	int header_requested, header_used;
+
+	int (*expected_compression) (void);
+
+	/*
+	 * Debug info
+	 */
+	int (*print_debug_info) (char *buffer, int size);
+	int (*save_config_info) (char *buffer);
+	void (*load_config_info) (char *buffer, int len);
+
+	/*
+	 * Initialise & cleanup - general routines called
+	 * at the start and end of a cycle.
+	 */
+	int (*initialise) (int starting_cycle);
+	void (*cleanup) (int finishing_cycle);
+
+	/*
+	 * Calls for allocating storage (allocators only).
+	 *
+	 * Header space is allocated separately. Note that allocation
+	 * of space for the header might result in allocated space
+	 * being stolen from the main pool if there is no unallocated
+	 * space. We have to be able to allocate enough space for
+	 * the header. We can eat memory to ensure there is enough
+	 * for the main pool.
+	 */
+
+	int (*storage_available) (void);
+	void (*reserve_header_space) (int space_requested);
+	int (*allocate_storage) (int space_requested);
+	int (*storage_allocated) (void);
+	int (*release_storage) (void);
+
+	/*
+	 * Routines used in image I/O.
+	 */
+	int (*rw_init) (int rw, int stream_number);
+	int (*rw_cleanup) (int rw);
+	int (*write_page) (unsigned long index, struct page *buffer_page,
+			unsigned int buf_size);
+	int (*read_page) (unsigned long *index, struct page *buffer_page,
+			unsigned int *buf_size);
+	void (*io_flusher) (int rw);
+
+	/* Reset module if image exists but reading aborted */
+	void (*noresume_reset) (void);
+
+	/* Read and write the metadata */
+	int (*write_header_init) (void);
+	int (*write_header_cleanup) (void);
+
+	int (*read_header_init) (void);
+	int (*read_header_cleanup) (void);
+
+	int (*rw_header_chunk) (int rw, struct toi_module_ops *owner,
+			char *buffer_start, int buffer_size);
+
+	int (*rw_header_chunk_noreadahead) (int rw,
+			struct toi_module_ops *owner, char *buffer_start,
+			int buffer_size);
+
+	/* Attempt to parse an image location */
+	int (*parse_sig_location) (char *buffer, int only_writer, int quiet);
+
+	/* Determine whether image exists that we can restore */
+	int (*image_exists) (int quiet);
+
+	/* Mark the image as having tried to resume */
+	int (*mark_resume_attempted) (int);
+
+	/* Destroy image if one exists */
+	int (*remove_image) (void);
+
+	/* Sysfs Data */
+	struct toi_sysfs_data *sysfs_data;
+	int num_sysfs_entries;
+};
+
+extern int toi_num_modules, toiNumAllocators;
+
+extern struct toi_module_ops *toiActiveAllocator;
+extern struct list_head toi_filters, toiAllocators, toi_modules;
+
+extern void toi_prepare_console_modules(void);
+extern void toi_cleanup_console_modules(void);
+
+extern struct toi_module_ops *toi_find_module_given_name(char *name);
+extern struct toi_module_ops *toi_get_next_filter(struct toi_module_ops *);
+
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_move_module_tail(struct toi_module_ops *module);
+
+extern long toi_header_storage_for_modules(void);
+extern long toi_memory_for_modules(int print_parts);
+extern int toi_expected_compression_ratio(void);
+
+extern int toi_print_module_debug_info(char *buffer, int buffer_size);
+extern int toi_register_module(struct toi_module_ops *module);
+extern void toi_unregister_module(struct toi_module_ops *module);
+
+extern int toi_initialise_modules(int starting_cycle, int early);
+#define toi_initialise_modules_early(starting) \
+	toi_initialise_modules(starting, 1)
+#define toi_initialise_modules_late(starting) \
+	toi_initialise_modules(starting, 0)
+extern void toi_cleanup_modules(int finishing_cycle);
+
+extern void toi_print_modules(void);
+
+int toi_get_modules(void);
+void toi_put_modules(void);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_netlink.c linux-2.6-block-custom/kernel/power/tuxonice_netlink.c
--- linux-2.6-block/kernel/power/tuxonice_netlink.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_netlink.c	2008-09-26 19:48:23.999808217 +0900
@@ -0,0 +1,327 @@
+/*
+ * kernel/power/tuxonice_netlink.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Functions for communicating with a userspace helper via netlink.
+ */
+
+
+#include <linux/suspend.h>
+#include "tuxonice_netlink.h"
+#include "tuxonice.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_alloc.h"
+
+struct user_helper_data *uhd_list;
+
+/*
+ * Refill our pool of SKBs for use in emergencies (eg, when eating memory and
+ * none can be allocated).
+ */
+static void toi_fill_skb_pool(struct user_helper_data *uhd)
+{
+	while (uhd->pool_level < uhd->pool_limit) {
+		struct sk_buff *new_skb =
+			alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+		if (!new_skb)
+			break;
+
+		new_skb->next = uhd->emerg_skbs;
+		uhd->emerg_skbs = new_skb;
+		uhd->pool_level++;
+	}
+}
+
+/*
+ * Try to allocate a single skb. If we can't get one, try to use one from
+ * our pool.
+ */
+static struct sk_buff *toi_get_skb(struct user_helper_data *uhd)
+{
+	struct sk_buff *skb =
+		alloc_skb(NLMSG_SPACE(uhd->skb_size), TOI_ATOMIC_GFP);
+
+	if (skb)
+		return skb;
+
+	skb = uhd->emerg_skbs;
+	if (skb) {
+		uhd->pool_level--;
+		uhd->emerg_skbs = skb->next;
+		skb->next = NULL;
+	}
+
+	return skb;
+}
+
+static void put_skb(struct user_helper_data *uhd, struct sk_buff *skb)
+{
+	if (uhd->pool_level < uhd->pool_limit) {
+		skb->next = uhd->emerg_skbs;
+		uhd->emerg_skbs = skb;
+	} else
+		kfree_skb(skb);
+}
+
+void toi_send_netlink_message(struct user_helper_data *uhd,
+		int type, void *params, size_t len)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	void *dest;
+	struct task_struct *t;
+
+	if (uhd->pid == -1)
+		return;
+
+	skb = toi_get_skb(uhd);
+	if (!skb) {
+		printk(KERN_INFO "toi_netlink: Can't allocate skb!\n");
+		return;
+	}
+
+	/* NLMSG_PUT contains a hidden goto nlmsg_failure */
+	nlh = NLMSG_PUT(skb, 0, uhd->sock_seq, type, len);
+	uhd->sock_seq++;
+
+	dest = NLMSG_DATA(nlh);
+	if (params && len > 0)
+		memcpy(dest, params, len);
+
+	netlink_unicast(uhd->nl, skb, uhd->pid, 0);
+
+	read_lock(&tasklist_lock);
+	t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
+	if (!t) {
+		read_unlock(&tasklist_lock);
+		if (uhd->pid > -1)
+			printk(KERN_INFO "Hmm. Can't find the userspace task"
+				" %d.\n", uhd->pid);
+		return;
+	}
+	wake_up_process(t);
+	read_unlock(&tasklist_lock);
+
+	yield();
+
+	return;
+
+nlmsg_failure:
+	if (skb)
+		put_skb(uhd, skb);
+}
+EXPORT_SYMBOL_GPL(toi_send_netlink_message);
+
+static void send_whether_debugging(struct user_helper_data *uhd)
+{
+	static int is_debugging = 1;
+
+	toi_send_netlink_message(uhd, NETLINK_MSG_IS_DEBUGGING,
+			&is_debugging, sizeof(int));
+}
+
+/*
+ * Set the PF_NOFREEZE flag on the given process to ensure it can run whilst we
+ * are hibernating.
+ */
+static int nl_set_nofreeze(struct user_helper_data *uhd, int pid)
+{
+	struct task_struct *t;
+
+	read_lock(&tasklist_lock);
+	t = find_task_by_pid_type_ns(PIDTYPE_PID, pid, &init_pid_ns);
+	if (!t) {
+		read_unlock(&tasklist_lock);
+		printk(KERN_INFO "Strange. Can't find the userspace task %d.\n",
+				pid);
+		return -EINVAL;
+	}
+
+	t->flags |= PF_NOFREEZE;
+
+	read_unlock(&tasklist_lock);
+	uhd->pid = pid;
+
+	toi_send_netlink_message(uhd, NETLINK_MSG_NOFREEZE_ACK, NULL, 0);
+
+	return 0;
+}
+
+/*
+ * Called when the userspace process has informed us that it's ready to roll.
+ */
+static int nl_ready(struct user_helper_data *uhd, int version)
+{
+	if (version != uhd->interface_version) {
+		printk(KERN_INFO "%s userspace process using invalid interface"
+				" version. Trying to continue without it.\n",
+				uhd->name);
+		if (uhd->not_ready)
+			uhd->not_ready();
+		return -EINVAL;
+	}
+
+	complete(&uhd->wait_for_process);
+
+	return 0;
+}
+
+void toi_netlink_close_complete(struct user_helper_data *uhd)
+{
+	if (uhd->nl) {
+		sock_release(uhd->nl->sk_socket);
+		uhd->nl = NULL;
+	}
+
+	while (uhd->emerg_skbs) {
+		struct sk_buff *next = uhd->emerg_skbs->next;
+		kfree_skb(uhd->emerg_skbs);
+		uhd->emerg_skbs = next;
+	}
+
+	uhd->pid = -1;
+}
+EXPORT_SYMBOL_GPL(toi_netlink_close_complete);
+
+static int toi_nl_gen_rcv_msg(struct user_helper_data *uhd,
+		struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int type;
+	int *data;
+	int err;
+
+	/* Let the more specific handler go first. It returns
+	 * 1 for valid messages that it doesn't know. */
+	err = uhd->rcv_msg(skb, nlh);
+	if (err != 1)
+		return err;
+
+	type = nlh->nlmsg_type;
+
+	/* Only allow one task to receive NOFREEZE privileges */
+	if (type == NETLINK_MSG_NOFREEZE_ME && uhd->pid != -1) {
+		printk("Received extra nofreeze me requests.\n");
+		return -EBUSY;
+	}
+
+	data = (int *)NLMSG_DATA(nlh);
+
+	switch (type) {
+	case NETLINK_MSG_NOFREEZE_ME:
+		return nl_set_nofreeze(uhd, nlh->nlmsg_pid);
+	case NETLINK_MSG_GET_DEBUGGING:
+		send_whether_debugging(uhd);
+		return 0;
+	case NETLINK_MSG_READY:
+		if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int))) {
+			printk(KERN_INFO "Invalid ready mesage.\n");
+			return -EINVAL;
+		}
+		return nl_ready(uhd, *data);
+	case NETLINK_MSG_CLEANUP:
+		toi_netlink_close_complete(uhd);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void toi_user_rcv_skb(struct sk_buff *skb)
+{
+	int err;
+	struct nlmsghdr *nlh;
+	struct user_helper_data *uhd = uhd_list;
+
+	while (uhd && uhd->netlink_id != skb->sk->sk_protocol)
+		uhd = uhd->next;
+
+	if (!uhd)
+		return;
+
+	while (skb->len >= NLMSG_SPACE(0)) {
+		u32 rlen;
+
+		nlh = (struct nlmsghdr *) skb->data;
+		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+			return;
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+
+		err = toi_nl_gen_rcv_msg(uhd, skb, nlh);
+		if (err)
+			netlink_ack(skb, nlh, err);
+		else if (nlh->nlmsg_flags & NLM_F_ACK)
+			netlink_ack(skb, nlh, 0);
+		skb_pull(skb, rlen);
+	}
+}
+
+static int netlink_prepare(struct user_helper_data *uhd)
+{
+	uhd->next = uhd_list;
+	uhd_list = uhd;
+
+	uhd->sock_seq = 0x42c0ffee;
+	uhd->nl = netlink_kernel_create(&init_net, uhd->netlink_id, 0,
+			toi_user_rcv_skb, NULL, THIS_MODULE);
+	if (!uhd->nl) {
+		printk(KERN_INFO "Failed to allocate netlink socket for %s.\n",
+				uhd->name);
+		return -ENOMEM;
+	}
+
+	toi_fill_skb_pool(uhd);
+
+	return 0;
+}
+
+void toi_netlink_close(struct user_helper_data *uhd)
+{
+	struct task_struct *t;
+
+	read_lock(&tasklist_lock);
+	t = find_task_by_pid_type_ns(PIDTYPE_PID, uhd->pid, &init_pid_ns);
+	if (t)
+		t->flags &= ~PF_NOFREEZE;
+	read_unlock(&tasklist_lock);
+
+	toi_send_netlink_message(uhd, NETLINK_MSG_CLEANUP, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(toi_netlink_close);
+
+int toi_netlink_setup(struct user_helper_data *uhd)
+{
+	/* In case userui didn't cleanup properly on us */
+	toi_netlink_close_complete(uhd);
+
+	if (netlink_prepare(uhd) < 0) {
+		printk(KERN_INFO "Netlink prepare failed.\n");
+		return 1;
+	}
+
+	if (toi_launch_userspace_program(uhd->program, uhd->netlink_id,
+				UMH_WAIT_EXEC) < 0) {
+		printk(KERN_INFO "Launch userspace program failed.\n");
+		toi_netlink_close_complete(uhd);
+		return 1;
+	}
+
+	/* Wait 2 seconds for the userspace process to make contact */
+	wait_for_completion_timeout(&uhd->wait_for_process, 2*HZ);
+
+	if (uhd->pid == -1) {
+		printk(KERN_INFO "%s: Failed to contact userspace process.\n",
+				uhd->name);
+		toi_netlink_close_complete(uhd);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(toi_netlink_setup);
diff -Npur linux-2.6-block/kernel/power/tuxonice_netlink.h linux-2.6-block-custom/kernel/power/tuxonice_netlink.h
--- linux-2.6-block/kernel/power/tuxonice_netlink.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_netlink.h	2008-09-26 19:48:23.999808217 +0900
@@ -0,0 +1,61 @@
+/*
+ * kernel/power/tuxonice_netlink.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for functions for communicating with a userspace helper
+ * via netlink.
+ */
+
+#include <linux/netlink.h>
+#include <net/sock.h>
+
+#define NETLINK_MSG_BASE 0x10
+
+#define NETLINK_MSG_READY 0x10
+#define	NETLINK_MSG_NOFREEZE_ME 0x16
+#define NETLINK_MSG_GET_DEBUGGING 0x19
+#define NETLINK_MSG_CLEANUP 0x24
+#define NETLINK_MSG_NOFREEZE_ACK 0x27
+#define NETLINK_MSG_IS_DEBUGGING 0x28
+
+struct user_helper_data {
+	int (*rcv_msg) (struct sk_buff *skb, struct nlmsghdr *nlh);
+	void (*not_ready) (void);
+	struct sock *nl;
+	u32 sock_seq;
+	pid_t pid;
+	char *comm;
+	char program[256];
+	int pool_level;
+	int pool_limit;
+	struct sk_buff *emerg_skbs;
+	int skb_size;
+	int netlink_id;
+	char *name;
+	struct user_helper_data *next;
+	struct completion wait_for_process;
+	int interface_version;
+	int must_init;
+};
+
+#ifdef CONFIG_NET
+int toi_netlink_setup(struct user_helper_data *uhd);
+void toi_netlink_close(struct user_helper_data *uhd);
+void toi_send_netlink_message(struct user_helper_data *uhd,
+		int type, void *params, size_t len);
+void toi_netlink_close_complete(struct user_helper_data *uhd);
+#else
+static inline int toi_netlink_setup(struct user_helper_data *uhd)
+{
+	return 0;
+}
+
+static inline void toi_netlink_close(struct user_helper_data *uhd) { };
+static inline void toi_send_netlink_message(struct user_helper_data *uhd,
+		int type, void *params, size_t len) { };
+static inline void toi_netlink_close_complete(struct user_helper_data *uhd)
+	{ };
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_pagedir.c linux-2.6-block-custom/kernel/power/tuxonice_pagedir.c
--- linux-2.6-block/kernel/power/tuxonice_pagedir.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_pagedir.c	2008-09-26 19:48:24.011764764 +0900
@@ -0,0 +1,347 @@
+/*
+ * kernel/power/tuxonice_pagedir.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for handling pagesets.
+ * Note that pbes aren't actually stored as such. They're stored as
+ * bitmaps and extents.
+ */
+
+#include <linux/suspend.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <asm/tlbflush.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice.h"
+#include "power.h"
+#include "tuxonice_builtin.h"
+#include "tuxonice_alloc.h"
+
+static int ptoi_pfn;
+static struct pbe *this_low_pbe;
+static struct pbe **last_low_pbe_ptr;
+
+void toi_reset_alt_image_pageset2_pfn(void)
+{
+	ptoi_pfn = max_pfn + 1;
+}
+
+static struct page *first_conflicting_page;
+
+/*
+ * free_conflicting_pages
+ */
+
+void free_conflicting_pages(void)
+{
+	while (first_conflicting_page) {
+		struct page *next =
+			*((struct page **) kmap(first_conflicting_page));
+		kunmap(first_conflicting_page);
+		toi__free_page(29, first_conflicting_page);
+		first_conflicting_page = next;
+	}
+}
+
+/* __toi_get_nonconflicting_page
+ *
+ * Description: Gets order zero pages that won't be overwritten
+ *		while copying the original pages.
+ */
+
+struct page *___toi_get_nonconflicting_page(int can_be_highmem)
+{
+	struct page *page;
+	int flags = TOI_ATOMIC_GFP;
+	if (can_be_highmem)
+		flags |= __GFP_HIGHMEM;
+
+
+	if (test_toi_state(TOI_LOADING_ALT_IMAGE) && pageset2_map.bitmap &&
+				(ptoi_pfn < (max_pfn + 2))) {
+		/*
+		 * ptoi_pfn = max_pfn + 1 when yet to find first ps2 pfn that
+		 * can be used.
+		 * 	   = 0..max_pfn when going through list.
+		 * 	   = max_pfn + 2 when gone through whole list.
+		 */
+		do {
+			ptoi_pfn = get_next_bit_on(&pageset2_map, ptoi_pfn);
+			if (ptoi_pfn <= max_pfn) {
+				page = pfn_to_page(ptoi_pfn);
+				if (!PagePageset1(page) &&
+				    (can_be_highmem || !PageHighMem(page)))
+					return page;
+			} else
+				ptoi_pfn++;
+		} while (ptoi_pfn < max_pfn);
+	}
+
+	do {
+		page = toi_alloc_page(29, flags);
+		if (!page) {
+			printk(KERN_INFO "Failed to get nonconflicting "
+					"page.\n");
+			return 0;
+		}
+		if (PagePageset1(page)) {
+			struct page **next = (struct page **) kmap(page);
+			*next = first_conflicting_page;
+			first_conflicting_page = page;
+			kunmap(page);
+		}
+	} while (PagePageset1(page));
+
+	return page;
+}
+
+unsigned long __toi_get_nonconflicting_page(void)
+{
+	struct page *page = ___toi_get_nonconflicting_page(0);
+	return page ? (unsigned long) page_address(page) : 0;
+}
+
+struct pbe *get_next_pbe(struct page **page_ptr, struct pbe *this_pbe,
+		int highmem)
+{
+	if (((((unsigned long) this_pbe) & (PAGE_SIZE - 1))
+		     + 2 * sizeof(struct pbe)) > PAGE_SIZE) {
+		struct page *new_page =
+			___toi_get_nonconflicting_page(highmem);
+		if (!new_page)
+			return ERR_PTR(-ENOMEM);
+		this_pbe = (struct pbe *) kmap(new_page);
+		memset(this_pbe, 0, PAGE_SIZE);
+		*page_ptr = new_page;
+	} else
+		this_pbe++;
+
+	return this_pbe;
+}
+
+/* get_pageset1_load_addresses
+ *
+ * Description: We check here that pagedir & pages it points to won't collide
+ * 		with pages where we're going to restore from the loaded pages
+ * 		later.
+ * Returns:	Zero on success, one if couldn't find enough pages (shouldn't
+ * 		happen).
+ */
+
+int toi_get_pageset1_load_addresses(void)
+{
+	int pfn, highallocd = 0, lowallocd = 0;
+	int low_needed = pagedir1.size - get_highmem_size(pagedir1);
+	int high_needed = get_highmem_size(pagedir1);
+	int low_pages_for_highmem = 0;
+	unsigned long flags = GFP_ATOMIC | __GFP_NOWARN | __GFP_HIGHMEM;
+	struct page *page, *high_pbe_page = NULL, *last_high_pbe_page = NULL,
+		    *low_pbe_page;
+	struct pbe **last_high_pbe_ptr = &restore_highmem_pblist,
+		   *this_high_pbe = NULL;
+	int orig_low_pfn = max_pfn + 1, orig_high_pfn = max_pfn + 1;
+	int high_pbes_done = 0, low_pbes_done = 0;
+	int low_direct = 0, high_direct = 0;
+	int high_to_free, low_to_free;
+
+	last_low_pbe_ptr = &restore_pblist;
+
+	/* First, allocate pages for the start of our pbe lists. */
+	if (high_needed) {
+		high_pbe_page = ___toi_get_nonconflicting_page(1);
+		if (!high_pbe_page)
+			return 1;
+		this_high_pbe = (struct pbe *) kmap(high_pbe_page);
+		memset(this_high_pbe, 0, PAGE_SIZE);
+	}
+
+	low_pbe_page = ___toi_get_nonconflicting_page(0);
+	if (!low_pbe_page)
+		return 1;
+	this_low_pbe = (struct pbe *) page_address(low_pbe_page);
+
+	/*
+	 * Next, allocate all possible memory to find where we can
+	 * load data directly into destination pages. I'd like to do
+	 * this in bigger chunks, but then we can't free pages
+	 * individually later.
+	 */
+
+	do {
+		page = toi_alloc_page(30, flags);
+		if (page)
+			SetPagePageset1Copy(page);
+	} while (page);
+
+	/*
+	 * Find out how many high- and lowmem pages we allocated above,
+	 * and how many pages we can reload directly to their original
+	 * location.
+	 */
+	BITMAP_FOR_EACH_SET(&pageset1_copy_map, pfn) {
+		int is_high;
+		page = pfn_to_page(pfn);
+		is_high = PageHighMem(page);
+
+		if (PagePageset1(page)) {
+			if (test_action_state(TOI_NO_DIRECT_LOAD)) {
+				ClearPagePageset1Copy(page);
+				toi__free_page(30, page);
+				continue;
+			} else {
+				if (is_high)
+					high_direct++;
+				else
+					low_direct++;
+			}
+		} else {
+			if (is_high)
+				highallocd++;
+			else
+				lowallocd++;
+		}
+	}
+
+	high_needed -= high_direct;
+	low_needed -= low_direct;
+
+	/*
+	 * Do we need to use some lowmem pages for the copies of highmem
+	 * pages?
+	 */
+	if (high_needed > highallocd) {
+		low_pages_for_highmem = high_needed - highallocd;
+		high_needed -= low_pages_for_highmem;
+		low_needed += low_pages_for_highmem;
+	}
+
+	high_to_free = highallocd - high_needed;
+	low_to_free = lowallocd - low_needed;
+
+	/*
+	 * Now generate our pbes (which will be used for the atomic restore,
+	 * and free unneeded pages.
+	 */
+	BITMAP_FOR_EACH_SET(&pageset1_copy_map, pfn) {
+		int is_high;
+		page = pfn_to_page(pfn);
+		is_high = PageHighMem(page);
+
+		if (PagePageset1(page))
+			continue;
+
+		/* Free the page? */
+		if ((is_high && high_to_free) ||
+		    (!is_high && low_to_free)) {
+			ClearPagePageset1Copy(page);
+			toi__free_page(30, page);
+			if (is_high)
+				high_to_free--;
+			else
+				low_to_free--;
+			continue;
+		}
+
+		/* Nope. We're going to use this page. Add a pbe. */
+		if (is_high || low_pages_for_highmem) {
+			struct page *orig_page;
+			high_pbes_done++;
+			if (!is_high)
+				low_pages_for_highmem--;
+			do {
+				orig_high_pfn = get_next_bit_on(&pageset1_map,
+						orig_high_pfn);
+				BUG_ON(orig_high_pfn > max_pfn);
+				orig_page = pfn_to_page(orig_high_pfn);
+			} while (!PageHighMem(orig_page) ||
+					load_direct(orig_page));
+
+			this_high_pbe->orig_address = orig_page;
+			this_high_pbe->address = page;
+			this_high_pbe->next = NULL;
+			if (last_high_pbe_page != high_pbe_page) {
+				*last_high_pbe_ptr =
+					(struct pbe *) high_pbe_page;
+				if (!last_high_pbe_page)
+					last_high_pbe_page = high_pbe_page;
+			} else
+				*last_high_pbe_ptr = this_high_pbe;
+			last_high_pbe_ptr = &this_high_pbe->next;
+			if (last_high_pbe_page != high_pbe_page) {
+				kunmap(last_high_pbe_page);
+				last_high_pbe_page = high_pbe_page;
+			}
+			this_high_pbe = get_next_pbe(&high_pbe_page,
+					this_high_pbe, 1);
+			if (IS_ERR(this_high_pbe)) {
+				printk(KERN_INFO
+						"This high pbe is an error.\n");
+				return -ENOMEM;
+			}
+		} else {
+			struct page *orig_page;
+			low_pbes_done++;
+			do {
+				orig_low_pfn = get_next_bit_on(&pageset1_map,
+						orig_low_pfn);
+				BUG_ON(orig_low_pfn > max_pfn);
+				orig_page = pfn_to_page(orig_low_pfn);
+			} while (PageHighMem(orig_page) ||
+					load_direct(orig_page));
+
+			this_low_pbe->orig_address = page_address(orig_page);
+			this_low_pbe->address = page_address(page);
+			this_low_pbe->next = NULL;
+			*last_low_pbe_ptr = this_low_pbe;
+			last_low_pbe_ptr = &this_low_pbe->next;
+			this_low_pbe = get_next_pbe(&low_pbe_page,
+					this_low_pbe, 0);
+			if (IS_ERR(this_low_pbe)) {
+				printk(KERN_INFO "this_low_pbe is an error.\n");
+				return -ENOMEM;
+			}
+		}
+	}
+
+	if (high_pbe_page)
+		kunmap(high_pbe_page);
+
+	if (last_high_pbe_page != high_pbe_page) {
+		if (last_high_pbe_page)
+			kunmap(last_high_pbe_page);
+		toi__free_page(29, high_pbe_page);
+	}
+
+	free_conflicting_pages();
+
+	return 0;
+}
+
+int add_boot_kernel_data_pbe(void)
+{
+	this_low_pbe->address = (char *) __toi_get_nonconflicting_page();
+	if (!this_low_pbe->address) {
+		printk(KERN_INFO "Failed to get bkd atomic restore buffer.");
+		return -ENOMEM;
+	}
+
+	toi_bkd.size = sizeof(toi_bkd);
+	memcpy(this_low_pbe->address, &toi_bkd, sizeof(toi_bkd));
+
+	*last_low_pbe_ptr = this_low_pbe;
+	this_low_pbe->orig_address = (char *) boot_kernel_data_buffer;
+	this_low_pbe->next = NULL;
+	return 0;
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_pagedir.h linux-2.6-block-custom/kernel/power/tuxonice_pagedir.h
--- linux-2.6-block/kernel/power/tuxonice_pagedir.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_pagedir.h	2008-09-26 19:48:24.011764764 +0900
@@ -0,0 +1,50 @@
+/*
+ * kernel/power/tuxonice_pagedir.h
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Declarations for routines for handling pagesets.
+ */
+
+#ifndef KERNEL_POWER_PAGEDIR_H
+#define KERNEL_POWER_PAGEDIR_H
+
+/* Pagedir
+ *
+ * Contains the metadata for a set of pages saved in the image.
+ */
+
+struct pagedir {
+	int id;
+	long size;
+#ifdef CONFIG_HIGHMEM
+	long size_high;
+#endif
+};
+
+#ifdef CONFIG_HIGHMEM
+#define get_highmem_size(pagedir) (pagedir.size_high)
+#define set_highmem_size(pagedir, sz) do { pagedir.size_high = sz; } while (0)
+#define inc_highmem_size(pagedir) do { pagedir.size_high++; } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size - pagedir.size_high)
+#else
+#define get_highmem_size(pagedir) (0)
+#define set_highmem_size(pagedir, sz) do { } while (0)
+#define inc_highmem_size(pagedir) do { } while (0)
+#define get_lowmem_size(pagedir) (pagedir.size)
+#endif
+
+extern struct pagedir pagedir1, pagedir2;
+
+extern void toi_copy_pageset1(void);
+
+extern int toi_get_pageset1_load_addresses(void);
+
+extern unsigned long __toi_get_nonconflicting_page(void);
+struct page *___toi_get_nonconflicting_page(int can_be_highmem);
+
+extern void toi_reset_alt_image_pageset2_pfn(void);
+extern int add_boot_kernel_data_pbe(void);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_pageflags.c linux-2.6-block-custom/kernel/power/tuxonice_pageflags.c
--- linux-2.6-block/kernel/power/tuxonice_pageflags.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_pageflags.c	2008-09-26 19:48:24.015772396 +0900
@@ -0,0 +1,162 @@
+/*
+ * kernel/power/tuxonice_pageflags.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for serialising and relocating pageflags in which we
+ * store our image metadata.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/suspend.h>
+#include "tuxonice_pageflags.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_pagedir.h"
+#include "tuxonice.h"
+
+DECLARE_DYN_PAGEFLAGS(pageset2_map);
+DECLARE_DYN_PAGEFLAGS(page_resave_map);
+DECLARE_DYN_PAGEFLAGS(io_map);
+DECLARE_DYN_PAGEFLAGS(nosave_map);
+DECLARE_DYN_PAGEFLAGS(free_map);
+
+static int pages_for_zone(struct zone *zone)
+{
+	return DIV_ROUND_UP(zone->spanned_pages, (PAGE_SIZE << 3));
+}
+
+int toi_pageflags_space_needed(void)
+{
+	int total = 0;
+	struct zone *zone;
+
+	for_each_zone(zone)
+		if (populated_zone(zone))
+			total += sizeof(int) * 3 + pages_for_zone(zone) *
+				PAGE_SIZE;
+
+	total += sizeof(int);
+
+	return total;
+}
+
+/* save_dyn_pageflags
+ *
+ * Description: Save a set of pageflags.
+ * Arguments:   struct dyn_pageflags *: Pointer to the bitmap being saved.
+ */
+
+void save_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int i, zone_idx, size, node = 0;
+	struct zone *zone;
+	struct pglist_data *pgdat;
+
+	if (!pagemap)
+		return;
+
+	for_each_online_pgdat(pgdat) {
+		for (zone_idx = 0; zone_idx < MAX_NR_ZONES; zone_idx++) {
+			zone = &pgdat->node_zones[zone_idx];
+
+			if (!populated_zone(zone))
+				continue;
+
+			toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+					(char *) &node, sizeof(int));
+			toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+					(char *) &zone_idx, sizeof(int));
+			size = pages_for_zone(zone);
+			toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+					(char *) &size, sizeof(int));
+
+			for (i = 0; i < size; i++) {
+				if (!pagemap->bitmap[node][zone_idx][i+2]) {
+					printk(KERN_INFO "Sparse pagemap?\n");
+					dump_pagemap(pagemap);
+					BUG();
+				}
+				toiActiveAllocator->rw_header_chunk(WRITE,
+					NULL, (char *) pagemap->bitmap[node]
+						[zone_idx][i+2],
+					PAGE_SIZE);
+			}
+		}
+		node++;
+	}
+	node = -1;
+	toiActiveAllocator->rw_header_chunk(WRITE, NULL,
+			(char *) &node, sizeof(int));
+}
+
+/* load_dyn_pageflags
+ *
+ * Description: Load a set of pageflags.
+ * Arguments:   struct dyn_pageflags *: Pointer to the bitmap being loaded.
+ *              (It must be allocated before calling this routine).
+ */
+
+int load_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int i, zone_idx, zone_check = 0, size, node = 0;
+	struct zone *zone;
+	struct pglist_data *pgdat;
+
+	if (!pagemap)
+		return 1;
+
+	for_each_online_pgdat(pgdat) {
+		for (zone_idx = 0; zone_idx < MAX_NR_ZONES; zone_idx++) {
+			zone = &pgdat->node_zones[zone_idx];
+
+			if (!populated_zone(zone))
+				continue;
+
+			/* Same node? */
+			toiActiveAllocator->rw_header_chunk(READ, NULL,
+					(char *) &zone_check, sizeof(int));
+			if (zone_check != node) {
+				printk(KERN_INFO "Node read (%d) != node "
+						"(%d).\n",
+						zone_check, node);
+				return 1;
+			}
+
+			/* Same zone? */
+			toiActiveAllocator->rw_header_chunk(READ, NULL,
+					(char *) &zone_check, sizeof(int));
+			if (zone_check != zone_idx) {
+				printk(KERN_INFO "Zone read (%d) != node "
+						"(%d).\n",
+						zone_check, zone_idx);
+				return 1;
+			}
+
+
+			toiActiveAllocator->rw_header_chunk(READ, NULL,
+				(char *) &size, sizeof(int));
+
+			for (i = 0; i < size; i++)
+				toiActiveAllocator->rw_header_chunk(READ, NULL,
+					(char *) pagemap->bitmap[node][zone_idx]
+									[i+2],
+					PAGE_SIZE);
+		}
+		node++;
+	}
+	toiActiveAllocator->rw_header_chunk(READ, NULL, (char *) &zone_check,
+			sizeof(int));
+	if (zone_check != -1) {
+		printk(KERN_INFO "Didn't read end of dyn pageflag data marker."
+				"(%x)\n", zone_check);
+		return 1;
+	}
+
+	return 0;
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_pageflags.h linux-2.6-block-custom/kernel/power/tuxonice_pageflags.h
--- linux-2.6-block/kernel/power/tuxonice_pageflags.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_pageflags.h	2008-09-26 19:48:24.015772396 +0900
@@ -0,0 +1,63 @@
+/*
+ * kernel/power/tuxonice_pageflags.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * TuxOnIce needs a few pageflags while working that aren't otherwise
+ * used. To save the struct page pageflags, we dynamically allocate
+ * a bitmap and use that. These are the only non order-0 allocations
+ * we do.
+ *
+ * NOTE!!!
+ * We assume that PAGE_SIZE - sizeof(void *) is a multiple of
+ * sizeof(unsigned long). Is this ever false?
+ */
+
+#include <linux/dyn_pageflags.h>
+#include <linux/suspend.h>
+
+extern struct dyn_pageflags pageset1_map;
+extern struct dyn_pageflags pageset1_copy_map;
+extern struct dyn_pageflags pageset2_map;
+extern struct dyn_pageflags page_resave_map;
+extern struct dyn_pageflags io_map;
+extern struct dyn_pageflags nosave_map;
+extern struct dyn_pageflags free_map;
+
+#define PagePageset1(page) (test_dynpageflag(&pageset1_map, page))
+#define SetPagePageset1(page) (set_dynpageflag(&pageset1_map, page))
+#define ClearPagePageset1(page) (clear_dynpageflag(&pageset1_map, page))
+
+#define PagePageset1Copy(page) (test_dynpageflag(&pageset1_copy_map, page))
+#define SetPagePageset1Copy(page) (set_dynpageflag(&pageset1_copy_map, page))
+#define ClearPagePageset1Copy(page) \
+	(clear_dynpageflag(&pageset1_copy_map, page))
+
+#define PagePageset2(page) (test_dynpageflag(&pageset2_map, page))
+#define SetPagePageset2(page) (set_dynpageflag(&pageset2_map, page))
+#define ClearPagePageset2(page) (clear_dynpageflag(&pageset2_map, page))
+
+#define PageWasRW(page) (test_dynpageflag(&pageset2_map, page))
+#define SetPageWasRW(page) (set_dynpageflag(&pageset2_map, page))
+#define ClearPageWasRW(page) (clear_dynpageflag(&pageset2_map, page))
+
+#define PageResave(page) (page_resave_map.bitmap ? \
+	test_dynpageflag(&page_resave_map, page) : 0)
+#define SetPageResave(page) (set_dynpageflag(&page_resave_map, page))
+#define ClearPageResave(page) (clear_dynpageflag(&page_resave_map, page))
+
+#define PageNosave(page) (nosave_map.bitmap ? \
+		test_dynpageflag(&nosave_map, page) : 0)
+#define SetPageNosave(page) (set_dynpageflag(&nosave_map, page))
+#define ClearPageNosave(page) (clear_dynpageflag(&nosave_map, page))
+
+#define PageNosaveFree(page) (free_map.bitmap ? \
+		test_dynpageflag(&free_map, page) : 0)
+#define SetPageNosaveFree(page) (set_dynpageflag(&free_map, page))
+#define ClearPageNosaveFree(page) (clear_dynpageflag(&free_map, page))
+
+extern void save_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern int load_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern int toi_pageflags_space_needed(void);
diff -Npur linux-2.6-block/kernel/power/tuxonice_power_off.c linux-2.6-block-custom/kernel/power/tuxonice_power_off.c
--- linux-2.6-block/kernel/power/tuxonice_power_off.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_power_off.c	2008-09-26 19:48:24.015772396 +0900
@@ -0,0 +1,279 @@
+/*
+ * kernel/power/tuxonice_power_off.c
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for powering down.
+ */
+
+#include <linux/device.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <linux/pm.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+#include <linux/console.h>
+#include <linux/fs.h>
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+
+unsigned long toi_poweroff_method; /* 0 - Kernel power off */
+EXPORT_SYMBOL_GPL(toi_poweroff_method);
+
+int wake_delay;
+static char lid_state_file[256], wake_alarm_dir[256];
+static struct file *lid_file, *alarm_file, *epoch_file;
+int post_wake_state = -1;
+
+static int did_suspend_to_both;
+
+/*
+ * __toi_power_down
+ * Functionality   : Powers down or reboots the computer once the image
+ *                   has been written to disk.
+ * Key Assumptions : Able to reboot/power down via code called or that
+ *                   the warning emitted if the calls fail will be visible
+ *                   to the user (ie printk resumes devices).
+ */
+
+static void __toi_power_down(int method)
+{
+	int error;
+
+	if (test_action_state(TOI_REBOOT)) {
+		toi_cond_pause(1, "Ready to reboot.");
+		kernel_restart(NULL);
+	}
+
+	toi_cond_pause(1, "Powering down.");
+
+	switch (method) {
+	case 0:
+		break;
+	case 3:
+		error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
+		if (!error)
+			error = suspend_devices_and_enter(PM_SUSPEND_MEM);
+		pm_notifier_call_chain(PM_POST_SUSPEND);
+		if (!error) {
+			did_suspend_to_both = 1;
+			return;
+		}
+		break;
+	case 4:
+		/*
+		 * If succeeds, doesn't return. If fails, do a simple
+		 * powerdown.
+		 */
+		hibernation_platform_enter();
+		break;
+	case 5:
+		/* Historic entry only now */
+		break;
+	}
+
+	if (method && method != 5)
+		toi_cond_pause(1,
+			"Falling back to alternate power off method.");
+
+	if (test_result_state(TOI_ABORTED))
+		return;
+
+	kernel_power_off();
+	kernel_halt();
+	toi_cond_pause(1, "Powerdown failed.");
+	while (1)
+		cpu_relax();
+}
+
+#define CLOSE_FILE(file) \
+	if (file) { \
+		filp_close(file, NULL); file = NULL; \
+	}
+
+static void powerdown_cleanup(int toi_or_resume)
+{
+	if (!toi_or_resume)
+		return;
+
+	CLOSE_FILE(lid_file);
+	CLOSE_FILE(alarm_file);
+	CLOSE_FILE(epoch_file);
+}
+
+static void open_file(char *format, char *arg, struct file **var, int mode,
+		char *desc)
+{
+	char buf[256];
+
+	if (strlen(arg)) {
+		sprintf(buf, format, arg);
+		*var = filp_open(buf, mode, 0);
+		if (IS_ERR(*var) || !*var) {
+			printk(KERN_INFO "Failed to open %s file '%s' (%p).\n",
+				desc, buf, *var);
+			*var = 0;
+		}
+	}
+}
+
+static int powerdown_init(int toi_or_resume)
+{
+	if (!toi_or_resume)
+		return 0;
+
+	did_suspend_to_both = 0;
+
+	open_file("/proc/acpi/button/%s/state", lid_state_file, &lid_file,
+			O_RDONLY, "lid");
+
+	if (strlen(wake_alarm_dir)) {
+		open_file("/sys/class/rtc/%s/wakealarm", wake_alarm_dir,
+				&alarm_file, O_WRONLY, "alarm");
+
+		open_file("/sys/class/rtc/%s/since_epoch", wake_alarm_dir,
+				&epoch_file, O_RDONLY, "epoch");
+	}
+
+	return 0;
+}
+
+static int lid_closed(void)
+{
+	char array[25];
+	ssize_t size;
+	loff_t pos = 0;
+
+	if (!lid_file)
+		return 0;
+
+	size = vfs_read(lid_file, (char __user *) array, 25, &pos);
+	if ((int) size < 1) {
+		printk(KERN_INFO "Failed to read lid state file (%d).\n",
+			(int) size);
+		return 0;
+	}
+
+	if (!strcmp(array, "state:      closed\n"))
+		return 1;
+
+	return 0;
+}
+
+static void write_alarm_file(int value)
+{
+	ssize_t size;
+	char buf[40];
+	loff_t pos = 0;
+
+	if (!alarm_file)
+		return;
+
+	sprintf(buf, "%d\n", value);
+
+	size = vfs_write(alarm_file, (char __user *)buf, strlen(buf), &pos);
+
+	if (size < 0)
+		printk(KERN_INFO "Error %d writing alarm value %s.\n",
+				(int) size, buf);
+}
+
+/**
+ * toi_check_resleep: See whether to powerdown again after waking.
+ *
+ * After waking, check whether we should powerdown again in a (usually
+ * different) way. We only do this if the lid switch is still closed.
+ */
+void toi_check_resleep(void)
+{
+	/* We only return if we suspended to ram and woke. */
+	if (lid_closed() && post_wake_state >= 0)
+		__toi_power_down(post_wake_state);
+}
+
+void toi_power_down(void)
+{
+	if (alarm_file && wake_delay) {
+		char array[25];
+		loff_t pos = 0;
+		size_t size = vfs_read(epoch_file, (char __user *) array, 25,
+				&pos);
+
+		if (((int) size) < 1)
+			printk(KERN_INFO "Failed to read epoch file (%d).\n",
+					(int) size);
+		else {
+			unsigned long since_epoch =
+				simple_strtol(array, NULL, 0);
+
+			/* Clear any wakeup time. */
+			write_alarm_file(0);
+
+			/* Set new wakeup time. */
+			write_alarm_file(since_epoch + wake_delay);
+		}
+	}
+
+	__toi_power_down(toi_poweroff_method);
+
+	toi_check_resleep();
+}
+EXPORT_SYMBOL_GPL(toi_power_down);
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_ACPI)
+	{
+	 TOI_ATTR("lid_file", SYSFS_RW),
+	 SYSFS_STRING(lid_state_file, 256, 0),
+	},
+
+	{
+	  TOI_ATTR("wake_delay", SYSFS_RW),
+	  SYSFS_INT(&wake_delay, 0, INT_MAX, 0)
+	},
+
+	{
+	  TOI_ATTR("wake_alarm_dir", SYSFS_RW),
+	  SYSFS_STRING(wake_alarm_dir, 256, 0)
+	},
+
+	{ TOI_ATTR("post_wake_state", SYSFS_RW),
+	  SYSFS_INT(&post_wake_state, -1, 5, 0)
+	},
+
+	{ TOI_ATTR("powerdown_method", SYSFS_RW),
+	  SYSFS_UL(&toi_poweroff_method, 0, 5, 0)
+	},
+
+	{ TOI_ATTR("did_suspend_to_both", SYSFS_READONLY),
+	  SYSFS_INT(&did_suspend_to_both, 0, 0, 0)
+	},
+#endif
+};
+
+static struct toi_module_ops powerdown_ops = {
+	.type				= MISC_HIDDEN_MODULE,
+	.name				= "poweroff",
+	.initialise			= powerdown_init,
+	.cleanup			= powerdown_cleanup,
+	.directory			= "[ROOT]",
+	.module				= THIS_MODULE,
+	.sysfs_data			= sysfs_params,
+	.num_sysfs_entries		= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+int toi_poweroff_init(void)
+{
+	return toi_register_module(&powerdown_ops);
+}
+
+void toi_poweroff_exit(void)
+{
+	toi_unregister_module(&powerdown_ops);
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_power_off.h linux-2.6-block-custom/kernel/power/tuxonice_power_off.h
--- linux-2.6-block/kernel/power/tuxonice_power_off.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_power_off.h	2008-09-26 19:48:24.015772396 +0900
@@ -0,0 +1,34 @@
+/*
+ * kernel/power/tuxonice_power_off.h
+ *
+ * Copyright (C) 2006-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Support for the powering down.
+ */
+
+int toi_pm_state_finish(void);
+void toi_power_down(void);
+extern unsigned long toi_poweroff_method;
+extern int toi_platform_prepare(void);
+int toi_poweroff_init(void);
+void toi_poweroff_exit(void);
+void toi_check_resleep(void);
+
+extern int platform_begin(int platform_mode);
+extern int platform_pre_snapshot(int platform_mode);
+extern int platform_leave(int platform_mode);
+extern int platform_end(int platform_mode);
+extern int platform_finish(int platform_mode);
+extern int platform_pre_restore(int platform_mode);
+extern int platform_restore_cleanup(int platform_mode);
+
+#define platform_test() (toi_poweroff_method == 4)
+#define toi_platform_begin() platform_begin(platform_test())
+#define toi_platform_pre_snapshot() platform_pre_snapshot(platform_test())
+#define toi_platform_leave() platform_leave(platform_test())
+#define toi_platform_end() platform_end(platform_test())
+#define toi_platform_finish() platform_finish(platform_test())
+#define toi_platform_pre_restore() platform_pre_restore(platform_test())
+#define toi_platform_restore_cleanup() platform_restore_cleanup(platform_test())
diff -Npur linux-2.6-block/kernel/power/tuxonice_prepare_image.c linux-2.6-block-custom/kernel/power/tuxonice_prepare_image.c
--- linux-2.6-block/kernel/power/tuxonice_prepare_image.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_prepare_image.c	2008-09-26 19:48:24.023763356 +0900
@@ -0,0 +1,1054 @@
+/*
+ * kernel/power/tuxonice_prepare_image.c
+ *
+ * Copyright (C) 2003-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * We need to eat memory until we can:
+ * 1. Perform the save without changing anything (RAM_NEEDED < #pages)
+ * 2. Fit it all in available space (toiActiveAllocator->available_space() >=
+ *    main_storage_needed())
+ * 3. Reload the pagedir and pageset1 to places that don't collide with their
+ *    final destinations, not knowing to what extent the resumed kernel will
+ *    overlap with the one loaded at boot time. I think the resumed kernel
+ *    should overlap completely, but I don't want to rely on this as it is
+ *    an unproven assumption. We therefore assume there will be no overlap at
+ *    all (worse case).
+ * 4. Meet the user's requested limit (if any) on the size of the image.
+ *    The limit is in MB, so pages/256 (assuming 4K pages).
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/freezer.h>
+#include <linux/hardirq.h>
+#include <linux/mmzone.h>
+#include <linux/console.h>
+
+#include "tuxonice_pageflags.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_prepare_image.h"
+#include "tuxonice_block_io.h"
+#include "tuxonice.h"
+#include "tuxonice_checksum.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_alloc.h"
+
+static long num_nosave, header_space_allocated, main_storage_allocated,
+	   storage_available;
+long extra_pd1_pages_allowance = CONFIG_TOI_DEFAULT_EXTRA_PAGES_ALLOWANCE;
+int image_size_limit;
+
+struct attention_list {
+	struct task_struct *task;
+	struct attention_list *next;
+};
+
+static struct attention_list *attention_list;
+
+#define PAGESET1 0
+#define PAGESET2 1
+
+void free_attention_list(void)
+{
+	struct attention_list *last = NULL;
+
+	while (attention_list) {
+		last = attention_list;
+		attention_list = attention_list->next;
+		toi_kfree(6, last);
+	}
+}
+
+static int build_attention_list(void)
+{
+	int i, task_count = 0;
+	struct task_struct *p;
+	struct attention_list *next;
+
+	/*
+	 * Count all userspace process (with task->mm) marked PF_NOFREEZE.
+	 */
+	read_lock(&tasklist_lock);
+	for_each_process(p)
+		if ((p->flags & PF_NOFREEZE) || p == current)
+			task_count++;
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Allocate attention list structs.
+	 */
+	for (i = 0; i < task_count; i++) {
+		struct attention_list *this =
+			toi_kzalloc(6, sizeof(struct attention_list),
+					TOI_WAIT_GFP);
+		if (!this) {
+			printk(KERN_INFO "Failed to allocate slab for "
+					"attention list.\n");
+			free_attention_list();
+			return 1;
+		}
+		this->next = NULL;
+		if (attention_list)
+			this->next = attention_list;
+		attention_list = this;
+	}
+
+	next = attention_list;
+	read_lock(&tasklist_lock);
+	for_each_process(p)
+		if ((p->flags & PF_NOFREEZE) || p == current) {
+			next->task = p;
+			next = next->next;
+		}
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+static void pageset2_full(void)
+{
+	struct zone *zone;
+	unsigned long flags;
+
+	for_each_zone(zone) {
+		spin_lock_irqsave(&zone->lru_lock, flags);
+		if (zone_page_state(zone, NR_INACTIVE)) {
+			struct page *page;
+			list_for_each_entry(page, &zone->inactive_list, lru)
+				SetPagePageset2(page);
+		}
+		if (zone_page_state(zone, NR_ACTIVE)) {
+			struct page *page;
+			list_for_each_entry(page, &zone->active_list, lru)
+				SetPagePageset2(page);
+		}
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
+	}
+}
+
+/*
+ * toi_mark_task_as_pageset
+ * Functionality   : Marks all the saveable pages belonging to a given process
+ * 		     as belonging to a particular pageset.
+ */
+
+static void toi_mark_task_as_pageset(struct task_struct *t, int pageset2)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+
+	mm = t->active_mm;
+
+	if (!mm || !mm->mmap)
+		return;
+
+	if (!irqs_disabled())
+		down_read(&mm->mmap_sem);
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long posn;
+
+		if (vma->vm_flags & (VM_PFNMAP | VM_IO | VM_RESERVED) ||
+		    !vma->vm_start)
+			continue;
+
+		for (posn = vma->vm_start; posn < vma->vm_end;
+				posn += PAGE_SIZE) {
+			struct page *page = follow_page(vma, posn, 0);
+			if (!page)
+				continue;
+
+			if (pageset2)
+				SetPagePageset2(page);
+			else {
+				ClearPagePageset2(page);
+				SetPagePageset1(page);
+			}
+		}
+	}
+
+	if (!irqs_disabled())
+		up_read(&mm->mmap_sem);
+}
+
+/* mark_pages_for_pageset2
+ *
+ * Description:	Mark unshared pages in processes not needed for hibernate as
+ * 		being able to be written out in a separate pagedir.
+ * 		HighMem pages are simply marked as pageset2. They won't be
+ * 		needed during hibernate.
+ */
+
+static void toi_mark_pages_for_pageset2(void)
+{
+	struct task_struct *p;
+	struct attention_list *this = attention_list;
+
+	if (test_action_state(TOI_NO_PAGESET2))
+		return;
+
+	clear_dyn_pageflags(&pageset2_map);
+
+	if (test_action_state(TOI_PAGESET2_FULL))
+		pageset2_full();
+	else {
+		read_lock(&tasklist_lock);
+		for_each_process(p) {
+			if (!p->mm || (p->flags & PF_KTHREAD))
+				continue;
+
+			toi_mark_task_as_pageset(p, PAGESET2);
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	/*
+	 * Because the tasks in attention_list are ones related to hibernating,
+	 * we know that they won't go away under us.
+	 */
+
+	while (this) {
+		if (!test_result_state(TOI_ABORTED))
+			toi_mark_task_as_pageset(this->task, PAGESET1);
+		this = this->next;
+	}
+}
+
+/*
+ * The atomic copy of pageset1 is stored in pageset2 pages.
+ * But if pageset1 is larger (normally only just after boot),
+ * we need to allocate extra pages to store the atomic copy.
+ * The following data struct and functions are used to handle
+ * the allocation and freeing of that memory.
+ */
+
+static long extra_pages_allocated;
+
+struct extras {
+	struct page *page;
+	int order;
+	struct extras *next;
+};
+
+static struct extras *extras_list;
+
+/* toi_free_extra_pagedir_memory
+ *
+ * Description:	Free previously allocated extra pagedir memory.
+ */
+void toi_free_extra_pagedir_memory(void)
+{
+	/* Free allocated pages */
+	while (extras_list) {
+		struct extras *this = extras_list;
+		int i;
+
+		extras_list = this->next;
+
+		for (i = 0; i < (1 << this->order); i++)
+			ClearPageNosave(this->page + i);
+
+		toi_free_pages(9, this->page, this->order);
+		toi_kfree(7, this);
+	}
+
+	extra_pages_allocated = 0;
+}
+
+/* toi_allocate_extra_pagedir_memory
+ *
+ * Description:	Allocate memory for making the atomic copy of pagedir1 in the
+ * 		case where it is bigger than pagedir2.
+ * Arguments:	int	num_to_alloc: Number of extra pages needed.
+ * Result:	int. 	Number of extra pages we now have allocated.
+ */
+static int toi_allocate_extra_pagedir_memory(int extra_pages_needed)
+{
+	int j, order, num_to_alloc = extra_pages_needed - extra_pages_allocated;
+	unsigned long flags = TOI_ATOMIC_GFP;
+
+	if (num_to_alloc < 1)
+		return 0;
+
+	order = fls(num_to_alloc);
+	if (order >= MAX_ORDER)
+		order = MAX_ORDER - 1;
+
+	while (num_to_alloc) {
+		struct page *newpage;
+		unsigned long virt;
+		struct extras *extras_entry;
+
+		while ((1 << order) > num_to_alloc)
+			order--;
+
+		extras_entry = (struct extras *) toi_kzalloc(7,
+			sizeof(struct extras), TOI_ATOMIC_GFP);
+
+		if (!extras_entry)
+			return extra_pages_allocated;
+
+		virt = toi_get_free_pages(9, flags, order);
+		while (!virt && order) {
+			order--;
+			virt = toi_get_free_pages(9, flags, order);
+		}
+
+		if (!virt) {
+			toi_kfree(7, extras_entry);
+			return extra_pages_allocated;
+		}
+
+		newpage = virt_to_page(virt);
+
+		extras_entry->page = newpage;
+		extras_entry->order = order;
+		extras_entry->next = NULL;
+
+		if (extras_list)
+			extras_entry->next = extras_list;
+
+		extras_list = extras_entry;
+
+		for (j = 0; j < (1 << order); j++) {
+			SetPageNosave(newpage + j);
+			SetPagePageset1Copy(newpage + j);
+		}
+
+		extra_pages_allocated += (1 << order);
+		num_to_alloc -= (1 << order);
+	}
+
+	return extra_pages_allocated;
+}
+
+/*
+ * real_nr_free_pages: Count pcp pages for a zone type or all zones
+ * (-1 for all, otherwise zone_idx() result desired).
+ */
+long real_nr_free_pages(unsigned long zone_idx_mask)
+{
+	struct zone *zone;
+	int result = 0, cpu;
+
+	/* PCP lists */
+	for_each_zone(zone) {
+		if (!populated_zone(zone))
+			continue;
+
+		if (!(zone_idx_mask & (1 << zone_idx(zone))))
+			continue;
+
+		for_each_online_cpu(cpu) {
+			struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+			struct per_cpu_pages *pcp = &pset->pcp;
+			result += pcp->count;
+		}
+
+		result += zone_page_state(zone, NR_FREE_PAGES);
+	}
+	return result;
+}
+
+/*
+ * Discover how much extra memory will be required by the drivers
+ * when they're asked to hibernate. We can then ensure that amount
+ * of memory is available when we really want it.
+ */
+static void get_extra_pd1_allowance(void)
+{
+	long orig_num_free = real_nr_free_pages(all_zones_mask), final;
+
+	toi_prepare_status(CLEAR_BAR, "Finding allowance for drivers.");
+
+	suspend_console();
+	device_suspend(PMSG_FREEZE);
+	local_irq_disable(); /* irqs might have been re-enabled on us */
+	device_power_down(PMSG_FREEZE);
+
+	final = real_nr_free_pages(all_zones_mask);
+
+	device_power_up(PMSG_THAW);
+	local_irq_enable();
+	device_resume(PMSG_THAW);
+	resume_console();
+
+	extra_pd1_pages_allowance = max(
+		orig_num_free - final + MIN_EXTRA_PAGES_ALLOWANCE,
+		(long) MIN_EXTRA_PAGES_ALLOWANCE);
+}
+
+/*
+ * Amount of storage needed, possibly taking into account the
+ * expected compression ratio and possibly also ignoring our
+ * allowance for extra pages.
+ */
+static long main_storage_needed(int use_ecr,
+		int ignore_extra_pd1_allow)
+{
+	return (pagedir1.size + pagedir2.size +
+	  (ignore_extra_pd1_allow ? 0 : extra_pd1_pages_allowance)) *
+	 (use_ecr ? toi_expected_compression_ratio() : 100) / 100;
+}
+
+/*
+ * Storage needed for the image header, in bytes until the return.
+ */
+static long header_storage_needed(void)
+{
+	long bytes = (int) sizeof(struct toi_header) +
+			toi_header_storage_for_modules() +
+			toi_pageflags_space_needed();
+
+	return DIV_ROUND_UP(bytes, PAGE_SIZE);
+}
+
+/*
+ * When freeing memory, pages from either pageset might be freed.
+ *
+ * When seeking to free memory to be able to hibernate, for every ps1 page
+ * freed, we need 2 less pages for the atomic copy because there is one less
+ * page to copy and one more page into which data can be copied.
+ *
+ * Freeing ps2 pages saves us nothing directly. No more memory is available
+ * for the atomic copy. Indirectly, a ps1 page might be freed (slab?), but
+ * that's too much work to figure out.
+ *
+ * => ps1_to_free functions
+ *
+ * Of course if we just want to reduce the image size, because of storage
+ * limitations or an image size limit either ps will do.
+ *
+ * => any_to_free function
+ */
+
+static long highpages_ps1_to_free(void)
+{
+	return max_t(long, 0, DIV_ROUND_UP(get_highmem_size(pagedir1) -
+		get_highmem_size(pagedir2), 2) - real_nr_free_high_pages());
+}
+
+static long lowpages_ps1_to_free(void)
+{
+	return max_t(long, 0, DIV_ROUND_UP(get_lowmem_size(pagedir1) +
+		extra_pd1_pages_allowance + MIN_FREE_RAM +
+		toi_memory_for_modules(0) - get_lowmem_size(pagedir2) -
+		real_nr_free_low_pages() - extra_pages_allocated, 2));
+}
+
+static long current_image_size(void)
+{
+	return pagedir1.size + pagedir2.size + header_space_allocated;
+}
+
+static long storage_still_required(void)
+{
+	return max_t(long, 0, main_storage_needed(1, 1) - storage_available);
+}
+
+static long ram_still_required(void)
+{
+	return max_t(long, 0, MIN_FREE_RAM + toi_memory_for_modules(0) -
+		real_nr_free_low_pages() + 2 * extra_pd1_pages_allowance);
+}
+
+static long any_to_free(int use_image_size_limit)
+{
+	long user_limit = (use_image_size_limit && image_size_limit > 0) ?
+		max_t(long, 0, current_image_size() - (image_size_limit << 8))
+		: 0;
+
+	long storage_limit = storage_still_required(),
+	    ram_limit = ram_still_required();
+
+	return max(max(user_limit, storage_limit), ram_limit);
+}
+
+/* amount_needed
+ *
+ * Calculates the amount by which the image size needs to be reduced to meet
+ * our constraints.
+ */
+static long amount_needed(int use_image_size_limit)
+{
+	return max(highpages_ps1_to_free() + lowpages_ps1_to_free(),
+			any_to_free(use_image_size_limit));
+}
+
+static long image_not_ready(int use_image_size_limit)
+{
+	toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+		"Amount still needed (%ld) > 0:%d. Header: %ld < %ld: %d,"
+		" Storage allocd: %ld < %ld: %d.\n",
+			amount_needed(use_image_size_limit),
+			(amount_needed(use_image_size_limit) > 0),
+			header_space_allocated, header_storage_needed(),
+			header_space_allocated < header_storage_needed(),
+			main_storage_allocated,
+			main_storage_needed(1, 1),
+			main_storage_allocated < main_storage_needed(1, 1));
+
+	toi_cond_pause(0, NULL);
+
+	return (amount_needed(use_image_size_limit) > 0) ||
+		header_space_allocated < header_storage_needed() ||
+		 main_storage_allocated < main_storage_needed(1, 1);
+}
+
+static void display_failure_reason(int tries_exceeded)
+{
+	long storage_required = storage_still_required(),
+	    ram_required = ram_still_required(),
+	    high_ps1 = highpages_ps1_to_free(),
+	    low_ps1 = lowpages_ps1_to_free();
+
+	printk(KERN_INFO "Failed to prepare the image because...\n");
+
+	if (!storage_available) {
+		printk(KERN_INFO "- You need some storage available to be "
+				"able to hibernate.\n");
+		return;
+	}
+
+	if (tries_exceeded)
+		printk(KERN_INFO "- The maximum number of iterations was "
+				"reached without successfully preparing the "
+				"image.\n");
+
+	if (header_space_allocated < header_storage_needed()) {
+		printk(KERN_INFO "- Insufficient header storage allocated. "
+				"Need %ld, have %ld.\n",
+				header_storage_needed(),
+				header_space_allocated);
+		set_abort_result(TOI_INSUFFICIENT_STORAGE);
+	}
+
+	if (storage_required) {
+		printk(KERN_INFO " - We need at least %ld pages of storage "
+				"(ignoring the header), but only have %ld.\n",
+				main_storage_needed(1, 1),
+				main_storage_allocated);
+		set_abort_result(TOI_INSUFFICIENT_STORAGE);
+	}
+
+	if (ram_required) {
+		printk(KERN_INFO " - We need %ld more free pages of low "
+				"memory.\n", ram_required);
+		printk(KERN_INFO "     Minimum free     : %8d\n", MIN_FREE_RAM);
+		printk(KERN_INFO "   + Reqd. by modules : %8ld\n",
+				toi_memory_for_modules(0));
+		printk(KERN_INFO "   - Currently free   : %8ld\n",
+				real_nr_free_low_pages());
+		printk(KERN_INFO "   + 2 * extra allow  : %8ld\n",
+				2 * extra_pd1_pages_allowance);
+		printk(KERN_INFO "                      : ========\n");
+		printk(KERN_INFO "     Still needed     : %8ld\n",
+				ram_required);
+
+		/* Print breakdown of memory needed for modules */
+		toi_memory_for_modules(1);
+		set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+	}
+
+	if (high_ps1) {
+		printk(KERN_INFO "- We need to free %ld highmem pageset 1 "
+				"pages.\n", high_ps1);
+		set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+	}
+
+	if (low_ps1) {
+		printk(KERN_INFO " - We need to free %ld lowmem pageset 1 "
+				"pages.\n", low_ps1);
+		set_abort_result(TOI_UNABLE_TO_FREE_ENOUGH_MEMORY);
+	}
+}
+
+static void display_stats(int always, int sub_extra_pd1_allow)
+{
+	char buffer[255];
+	snprintf(buffer, 254,
+		"Free:%ld(%ld). Sets:%ld(%ld),%ld(%ld). Header:%ld/%ld. "
+		"Nosave:%ld-%ld=%ld. Storage:%lu/%lu(%lu=>%lu). "
+		"Needed:%ld,%ld,%ld(%d,%ld,%ld,%ld)\n",
+
+		/* Free */
+		real_nr_free_pages(all_zones_mask),
+		real_nr_free_low_pages(),
+
+		/* Sets */
+		pagedir1.size, pagedir1.size - get_highmem_size(pagedir1),
+		pagedir2.size, pagedir2.size - get_highmem_size(pagedir2),
+
+		/* Header */
+		header_space_allocated, header_storage_needed(),
+
+		/* Nosave */
+		num_nosave, extra_pages_allocated,
+		num_nosave - extra_pages_allocated,
+
+		/* Storage */
+		main_storage_allocated,
+		storage_available,
+		main_storage_needed(1, sub_extra_pd1_allow),
+		main_storage_needed(1, 1),
+
+		/* Needed */
+		lowpages_ps1_to_free(), highpages_ps1_to_free(),
+		any_to_free(1),
+		MIN_FREE_RAM, toi_memory_for_modules(0),
+		extra_pd1_pages_allowance, ((long) image_size_limit) << 8);
+
+	if (always)
+		printk(buffer);
+	else
+		toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 1, buffer);
+}
+
+/* generate_free_page_map
+ *
+ * Description:	This routine generates a bitmap of free pages from the
+ * 		lists used by the memory manager. We then use the bitmap
+ * 		to quickly calculate which pages to save and in which
+ * 		pagesets.
+ */
+static void generate_free_page_map(void)
+{
+	int order, pfn, cpu, t;
+	unsigned long flags, i;
+	struct zone *zone;
+	struct list_head *curr;
+
+	for_each_zone(zone) {
+		if (!populated_zone(zone))
+			continue;
+
+		spin_lock_irqsave(&zone->lock, flags);
+
+		for (i = 0; i < zone->spanned_pages; i++)
+			ClearPageNosaveFree(pfn_to_page(
+						zone->zone_start_pfn + i));
+
+		for_each_migratetype_order(order, t) {
+			list_for_each(curr,
+					&zone->free_area[order].free_list[t]) {
+				unsigned long i;
+
+				pfn = page_to_pfn(list_entry(curr, struct page,
+							lru));
+				for (i = 0; i < (1UL << order); i++)
+					SetPageNosaveFree(pfn_to_page(pfn + i));
+			}
+		}
+
+		for_each_online_cpu(cpu) {
+			struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+			struct per_cpu_pages *pcp = &pset->pcp;
+			struct page *page;
+
+			list_for_each_entry(page, &pcp->list, lru)
+				SetPageNosaveFree(page);
+		}
+
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
+
+/* size_of_free_region
+ *
+ * Description:	Return the number of pages that are free, beginning with and
+ * 		including this one.
+ */
+static int size_of_free_region(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long this_pfn = page_to_pfn(page),
+		      orig_pfn = this_pfn,
+		      end_pfn = zone->zone_start_pfn + zone->spanned_pages - 1;
+
+	while (this_pfn <= end_pfn && PageNosaveFree(pfn_to_page(this_pfn)))
+		this_pfn++;
+
+	return this_pfn - orig_pfn;
+}
+
+/* flag_image_pages
+ *
+ * This routine generates our lists of pages to be stored in each
+ * pageset. Since we store the data using extents, and adding new
+ * extents might allocate a new extent page, this routine may well
+ * be called more than once.
+ */
+static void flag_image_pages(int atomic_copy)
+{
+	int num_free = 0;
+	unsigned long loop;
+	struct zone *zone;
+
+	pagedir1.size = 0;
+	pagedir2.size = 0;
+
+	set_highmem_size(pagedir1, 0);
+	set_highmem_size(pagedir2, 0);
+
+	num_nosave = 0;
+
+	clear_dyn_pageflags(&pageset1_map);
+
+	generate_free_page_map();
+
+	/*
+	 * Pages not to be saved are marked Nosave irrespective of being
+	 * reserved.
+	 */
+	for_each_zone(zone) {
+		int highmem = is_highmem(zone);
+
+		if (!populated_zone(zone))
+			continue;
+
+		for (loop = 0; loop < zone->spanned_pages; loop++) {
+			unsigned long pfn = zone->zone_start_pfn + loop;
+			struct page *page;
+			int chunk_size;
+
+			if (!pfn_valid(pfn))
+				continue;
+
+			page = pfn_to_page(pfn);
+
+			chunk_size = size_of_free_region(page);
+			if (chunk_size) {
+				num_free += chunk_size;
+				loop += chunk_size - 1;
+				continue;
+			}
+
+			if (highmem)
+				page = saveable_highmem_page(pfn);
+			else
+				page = saveable_page(pfn);
+
+			if (!page || PageNosave(page)) {
+				num_nosave++;
+				continue;
+			}
+
+			if (PagePageset2(page)) {
+				pagedir2.size++;
+				if (PageHighMem(page))
+					inc_highmem_size(pagedir2);
+				else
+					SetPagePageset1Copy(page);
+				if (PageResave(page)) {
+					SetPagePageset1(page);
+					ClearPagePageset1Copy(page);
+					pagedir1.size++;
+					if (PageHighMem(page))
+						inc_highmem_size(pagedir1);
+				}
+			} else {
+				pagedir1.size++;
+				SetPagePageset1(page);
+				if (PageHighMem(page))
+					inc_highmem_size(pagedir1);
+			}
+		}
+	}
+
+	if (atomic_copy)
+		return;
+
+	toi_message(TOI_EAT_MEMORY, TOI_MEDIUM, 0,
+		"Count data pages: Set1 (%d) + Set2 (%d) + Nosave (%ld) + "
+		"NumFree (%d) = %d.\n",
+		pagedir1.size, pagedir2.size, num_nosave, num_free,
+		pagedir1.size + pagedir2.size + num_nosave + num_free);
+}
+
+void toi_recalculate_image_contents(int atomic_copy)
+{
+	clear_dyn_pageflags(&pageset1_map);
+	if (!atomic_copy) {
+		int pfn;
+		BITMAP_FOR_EACH_SET(&pageset2_map, pfn)
+			ClearPagePageset1Copy(pfn_to_page(pfn));
+		/* Need to call this before getting pageset1_size! */
+		toi_mark_pages_for_pageset2();
+	}
+	flag_image_pages(atomic_copy);
+
+	if (!atomic_copy) {
+		storage_available = toiActiveAllocator->storage_available();
+		display_stats(0, 0);
+	}
+}
+
+/* update_image
+ *
+ * Allocate [more] memory and storage for the image.
+ */
+static void update_image(void)
+{
+	int wanted, got;
+	long seek;
+
+	toi_recalculate_image_contents(0);
+
+	/* Include allowance for growth in pagedir1 while writing pagedir 2 */
+	wanted = pagedir1.size +  extra_pd1_pages_allowance -
+		get_lowmem_size(pagedir2);
+	if (wanted > extra_pages_allocated) {
+		got = toi_allocate_extra_pagedir_memory(wanted);
+		if (wanted < got) {
+			toi_message(TOI_EAT_MEMORY, TOI_LOW, 1,
+				"Want %d extra pages for pageset1, got %d.\n",
+				wanted, got);
+			return;
+		}
+	}
+
+	thaw_kernel_threads();
+
+	/*
+	 * Allocate remaining storage space, if possible, up to the
+	 * maximum we know we'll need. It's okay to allocate the
+	 * maximum if the writer is the swapwriter, but
+	 * we don't want to grab all available space on an NFS share.
+	 * We therefore ignore the expected compression ratio here,
+	 * thereby trying to allocate the maximum image size we could
+	 * need (assuming compression doesn't expand the image), but
+	 * don't complain if we can't get the full amount we're after.
+	 */
+
+	storage_available = toiActiveAllocator->storage_available();
+
+	header_space_allocated = header_storage_needed();
+
+	toiActiveAllocator->reserve_header_space(header_space_allocated);
+
+	seek = min(storage_available, main_storage_needed(0, 0));
+
+	toiActiveAllocator->allocate_storage(seek);
+
+	main_storage_allocated = toiActiveAllocator->storage_allocated();
+
+	if (freeze_processes())
+		set_abort_result(TOI_FREEZING_FAILED);
+
+	toi_recalculate_image_contents(0);
+}
+
+/* attempt_to_freeze
+ *
+ * Try to freeze processes.
+ */
+
+static int attempt_to_freeze(void)
+{
+	int result;
+
+	/* Stop processes before checking again */
+	thaw_processes();
+	toi_prepare_status(CLEAR_BAR, "Freezing processes & syncing "
+			"filesystems.");
+	result = freeze_processes();
+
+	if (result)
+		set_abort_result(TOI_FREEZING_FAILED);
+
+	return result;
+}
+
+/* eat_memory
+ *
+ * Try to free some memory, either to meet hard or soft constraints on the image
+ * characteristics.
+ *
+ * Hard constraints:
+ * - Pageset1 must be < half of memory;
+ * - We must have enough memory free at resume time to have pageset1
+ *   be able to be loaded in pages that don't conflict with where it has to
+ *   be restored.
+ * Soft constraints
+ * - User specificied image size limit.
+ */
+static void eat_memory(void)
+{
+	long amount_wanted = 0;
+	int did_eat_memory = 0;
+
+	/*
+	 * Note that if we have enough storage space and enough free memory, we
+	 * may exit without eating anything. We give up when the last 10
+	 * iterations ate no extra pages because we're not going to get much
+	 * more anyway, but the few pages we get will take a lot of time.
+	 *
+	 * We freeze processes before beginning, and then unfreeze them if we
+	 * need to eat memory until we think we have enough. If our attempts
+	 * to freeze fail, we give up and abort.
+	 */
+
+	toi_recalculate_image_contents(0);
+	amount_wanted = amount_needed(1);
+
+	switch (image_size_limit) {
+	case -1: /* Don't eat any memory */
+		if (amount_wanted > 0) {
+			set_abort_result(TOI_WOULD_EAT_MEMORY);
+			return;
+		}
+		break;
+	case -2:  /* Free caches only */
+		drop_pagecache();
+		toi_recalculate_image_contents(0);
+		amount_wanted = amount_needed(1);
+		did_eat_memory = 1;
+		break;
+	default:
+		break;
+	}
+
+	if (amount_wanted > 0 && !test_result_state(TOI_ABORTED) &&
+			image_size_limit != -1) {
+		struct zone *zone;
+		int zone_idx;
+
+		toi_prepare_status(CLEAR_BAR,
+				"Seeking to free %ldMB of memory.",
+				MB(amount_wanted));
+
+		thaw_kernel_threads();
+
+		for (zone_idx = 0; zone_idx < MAX_NR_ZONES; zone_idx++) {
+			unsigned long zone_type_free = max_t(long,
+					(zone_idx == ZONE_HIGHMEM) ?
+					highpages_ps1_to_free() :
+					lowpages_ps1_to_free(), amount_wanted);
+
+			if (zone_type_free < 0)
+				break;
+
+			for_each_zone(zone) {
+				if (zone_idx(zone) != zone_idx)
+					continue;
+
+				shrink_one_zone(zone, zone_type_free, 3);
+
+				did_eat_memory = 1;
+
+				toi_recalculate_image_contents(0);
+
+				amount_wanted = amount_needed(1);
+				zone_type_free = max_t(long,
+					(zone_idx == ZONE_HIGHMEM) ?
+					highpages_ps1_to_free() :
+					lowpages_ps1_to_free(), amount_wanted);
+
+				if (zone_type_free < 0)
+					break;
+			}
+		}
+
+		toi_cond_pause(0, NULL);
+
+		if (freeze_processes())
+			set_abort_result(TOI_FREEZING_FAILED);
+	}
+
+	if (did_eat_memory) {
+		unsigned long orig_state = get_toi_state();
+		/* Freeze_processes will call sys_sync too */
+		restore_toi_state(orig_state);
+		toi_recalculate_image_contents(0);
+	}
+
+	/* Blank out image size display */
+	toi_update_status(100, 100, NULL);
+}
+
+/* toi_prepare_image
+ *
+ * Entry point to the whole image preparation section.
+ *
+ * We do four things:
+ * - Freeze processes;
+ * - Ensure image size constraints are met;
+ * - Complete all the preparation for saving the image,
+ *   including allocation of storage. The only memory
+ *   that should be needed when we're finished is that
+ *   for actually storing the image (and we know how
+ *   much is needed for that because the modules tell
+ *   us).
+ * - Make sure that all dirty buffers are written out.
+ */
+#define MAX_TRIES 2
+int toi_prepare_image(void)
+{
+	int result = 1, tries = 1;
+
+	header_space_allocated = 0;
+	main_storage_allocated = 0;
+
+	if (attempt_to_freeze())
+		return 1;
+
+	if (!extra_pd1_pages_allowance)
+		get_extra_pd1_allowance();
+
+	storage_available = toiActiveAllocator->storage_available();
+
+	if (!storage_available) {
+		printk(KERN_INFO "No storage available. Didn't try to prepare "
+				"an image.\n");
+		display_failure_reason(0);
+		set_abort_result(TOI_NOSTORAGE_AVAILABLE);
+		return 1;
+	}
+
+	if (build_attention_list()) {
+		abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+				"Unable to successfully prepare the image.\n");
+		return 1;
+	}
+
+	do {
+		toi_prepare_status(CLEAR_BAR,
+				"Preparing Image. Try %d.", tries);
+
+		eat_memory();
+
+		if (test_result_state(TOI_ABORTED))
+			break;
+
+		update_image();
+
+		tries++;
+
+	} while (image_not_ready(1) && tries <= MAX_TRIES &&
+			!test_result_state(TOI_ABORTED));
+
+	result = image_not_ready(0);
+
+	if (!test_result_state(TOI_ABORTED)) {
+		if (result) {
+			display_stats(1, 0);
+			display_failure_reason(tries > MAX_TRIES);
+			abort_hibernate(TOI_UNABLE_TO_PREPARE_IMAGE,
+				"Unable to successfully prepare the image.\n");
+		} else {
+			unlink_lru_lists();
+			toi_cond_pause(1, "Image preparation complete.");
+		}
+	}
+
+	return result ? result : allocate_checksum_pages();
+}
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(real_nr_free_pages);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_prepare_image.h linux-2.6-block-custom/kernel/power/tuxonice_prepare_image.h
--- linux-2.6-block/kernel/power/tuxonice_prepare_image.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_prepare_image.h	2008-09-26 19:48:24.023763356 +0900
@@ -0,0 +1,35 @@
+/*
+ * kernel/power/tuxonice_prepare_image.h
+ *
+ * Copyright (C) 2003-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <asm/sections.h>
+
+extern int toi_prepare_image(void);
+extern void toi_recalculate_image_contents(int storage_available);
+extern long real_nr_free_pages(unsigned long zone_idx_mask);
+extern int image_size_limit;
+extern void toi_free_extra_pagedir_memory(void);
+extern long extra_pd1_pages_allowance;
+extern void free_attention_list(void);
+
+#define MIN_FREE_RAM 100
+#define MIN_EXTRA_PAGES_ALLOWANCE 500
+
+#define all_zones_mask ((unsigned long) ((1 << MAX_NR_ZONES) - 1))
+#ifdef CONFIG_HIGHMEM
+#define real_nr_free_high_pages() (real_nr_free_pages(1 << ZONE_HIGHMEM))
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask - \
+						(1 << ZONE_HIGHMEM)))
+#else
+#define real_nr_free_high_pages() (0)
+#define real_nr_free_low_pages() (real_nr_free_pages(all_zones_mask))
+
+/* For eat_memory function */
+#define ZONE_HIGHMEM (MAX_NR_ZONES + 1)
+#endif
+
diff -Npur linux-2.6-block/kernel/power/tuxonice_storage.c linux-2.6-block-custom/kernel/power/tuxonice_storage.c
--- linux-2.6-block/kernel/power/tuxonice_storage.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_storage.c	2008-09-26 19:48:24.023763356 +0900
@@ -0,0 +1,293 @@
+/*
+ * kernel/power/tuxonice_storage.c
+ *
+ * Copyright (C) 2005-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for talking to a userspace program that manages storage.
+ *
+ * The kernel side:
+ * - starts the userspace program;
+ * - sends messages telling it when to open and close the connection;
+ * - tells it when to quit;
+ *
+ * The user space side:
+ * - passes messages regarding status;
+ *
+ */
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_ui.h"
+
+static struct user_helper_data usm_helper_data;
+static struct toi_module_ops usm_ops;
+static int message_received, usm_prepare_count;
+static int storage_manager_last_action, storage_manager_action;
+
+static int usm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int type;
+	int *data;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < NETLINK_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type >= USM_MSG_MAX)
+		return -EINVAL;
+
+	/* All operations require privileges, even GET */
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
+	/* Only allow one task to receive NOFREEZE privileges */
+	if (type == NETLINK_MSG_NOFREEZE_ME && usm_helper_data.pid != -1)
+		return -EBUSY;
+
+	data = (int *) NLMSG_DATA(nlh);
+
+	switch (type) {
+	case USM_MSG_SUCCESS:
+	case USM_MSG_FAILED:
+		message_received = type;
+		complete(&usm_helper_data.wait_for_process);
+		break;
+	default:
+		printk(KERN_INFO "Storage manager doesn't recognise "
+				"message %d.\n", type);
+	}
+
+	return 1;
+}
+
+#ifdef CONFIG_NET
+static int activations;
+
+int toi_activate_storage(int force)
+{
+	int tries = 1;
+
+	if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+		return 0;
+
+	message_received = 0;
+	activations++;
+
+	if (activations > 1 && !force)
+		return 0;
+
+	while ((!message_received || message_received == USM_MSG_FAILED) &&
+			tries < 2) {
+		toi_prepare_status(DONT_CLEAR_BAR, "Activate storage attempt "
+				"%d.\n", tries);
+
+		init_completion(&usm_helper_data.wait_for_process);
+
+		toi_send_netlink_message(&usm_helper_data,
+			USM_MSG_CONNECT,
+			NULL, 0);
+
+		/* Wait 2 seconds for the userspace process to make contact */
+		wait_for_completion_timeout(&usm_helper_data.wait_for_process,
+				2*HZ);
+
+		tries++;
+	}
+
+	return 0;
+}
+
+int toi_deactivate_storage(int force)
+{
+	if (usm_helper_data.pid == -1 || !usm_ops.enabled)
+		return 0;
+
+	message_received = 0;
+	activations--;
+
+	if (activations && !force)
+		return 0;
+
+	init_completion(&usm_helper_data.wait_for_process);
+
+	toi_send_netlink_message(&usm_helper_data,
+			USM_MSG_DISCONNECT,
+			NULL, 0);
+
+	wait_for_completion_timeout(&usm_helper_data.wait_for_process, 2*HZ);
+
+	if (!message_received || message_received == USM_MSG_FAILED) {
+		printk(KERN_INFO "Returning failure disconnecting storage.\n");
+		return 1;
+	}
+
+	return 0;
+}
+#endif
+
+static void storage_manager_simulate(void)
+{
+	printk(KERN_INFO "--- Storage manager simulate ---\n");
+	toi_prepare_usm();
+	schedule();
+	printk(KERN_INFO "--- Activate storage 1 ---\n");
+	toi_activate_storage(1);
+	schedule();
+	printk(KERN_INFO "--- Deactivate storage 1 ---\n");
+	toi_deactivate_storage(1);
+	schedule();
+	printk(KERN_INFO "--- Cleanup usm ---\n");
+	toi_cleanup_usm();
+	schedule();
+	printk(KERN_INFO "--- Storage manager simulate ends ---\n");
+}
+
+static int usm_storage_needed(void)
+{
+	return strlen(usm_helper_data.program);
+}
+
+static int usm_save_config_info(char *buf)
+{
+	int len = strlen(usm_helper_data.program);
+	memcpy(buf, usm_helper_data.program, len);
+	return len;
+}
+
+static void usm_load_config_info(char *buf, int size)
+{
+	/* Don't load the saved path if one has already been set */
+	if (usm_helper_data.program[0])
+		return;
+
+	memcpy(usm_helper_data.program, buf, size);
+}
+
+static int usm_memory_needed(void)
+{
+	/* ball park figure of 32 pages */
+	return 32 * PAGE_SIZE;
+}
+
+/* toi_prepare_usm
+ */
+int toi_prepare_usm(void)
+{
+	usm_prepare_count++;
+
+	if (usm_prepare_count > 1 || !usm_ops.enabled)
+		return 0;
+
+	usm_helper_data.pid = -1;
+
+	if (!*usm_helper_data.program)
+		return 0;
+
+	toi_netlink_setup(&usm_helper_data);
+
+	if (usm_helper_data.pid == -1)
+		printk(KERN_INFO "TuxOnIce Storage Manager wanted, but couldn't"
+				" start it.\n");
+
+	toi_activate_storage(0);
+
+	return usm_helper_data.pid != -1;
+}
+
+void toi_cleanup_usm(void)
+{
+	usm_prepare_count--;
+
+	if (usm_helper_data.pid > -1 && !usm_prepare_count) {
+		toi_deactivate_storage(0);
+		toi_netlink_close(&usm_helper_data);
+	}
+}
+
+static void storage_manager_activate(void)
+{
+	if (storage_manager_action == storage_manager_last_action)
+		return;
+
+	if (storage_manager_action)
+		toi_prepare_usm();
+	else
+		toi_cleanup_usm();
+
+	storage_manager_last_action = storage_manager_action;
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("simulate_atomic_copy", SYSFS_RW),
+	  .type				= TOI_SYSFS_DATA_NONE,
+	  .write_side_effect		= storage_manager_simulate,
+	},
+
+	{ TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&usm_ops.enabled, 0, 1, 0)
+	},
+
+	{ TOI_ATTR("program", SYSFS_RW),
+	  SYSFS_STRING(usm_helper_data.program, 254, 0)
+	},
+
+	{ TOI_ATTR("activate_storage", SYSFS_RW),
+	  SYSFS_INT(&storage_manager_action, 0, 1, 0),
+	  .write_side_effect		= storage_manager_activate,
+	}
+};
+
+static struct toi_module_ops usm_ops = {
+	.type				= MISC_MODULE,
+	.name				= "usm",
+	.directory			= "storage_manager",
+	.module				= THIS_MODULE,
+	.storage_needed			= usm_storage_needed,
+	.save_config_info		= usm_save_config_info,
+	.load_config_info		= usm_load_config_info,
+	.memory_needed			= usm_memory_needed,
+
+	.sysfs_data			= sysfs_params,
+	.num_sysfs_entries		= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* toi_usm_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+int toi_usm_init(void)
+{
+	usm_helper_data.nl = NULL;
+	usm_helper_data.program[0] = '\0';
+	usm_helper_data.pid = -1;
+	usm_helper_data.skb_size = 0;
+	usm_helper_data.pool_limit = 6;
+	usm_helper_data.netlink_id = NETLINK_TOI_USM;
+	usm_helper_data.name = "userspace storage manager";
+	usm_helper_data.rcv_msg = usm_user_rcv_msg;
+	usm_helper_data.interface_version = 1;
+	usm_helper_data.must_init = 0;
+	init_completion(&usm_helper_data.wait_for_process);
+
+	return toi_register_module(&usm_ops);
+}
+
+void toi_usm_exit(void)
+{
+	toi_netlink_close_complete(&usm_helper_data);
+	toi_unregister_module(&usm_ops);
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_storage.h linux-2.6-block-custom/kernel/power/tuxonice_storage.h
--- linux-2.6-block/kernel/power/tuxonice_storage.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_storage.h	2008-09-26 19:48:24.023763356 +0900
@@ -0,0 +1,53 @@
+/*
+ * kernel/power/tuxonice_storage.h
+ *
+ * Copyright (C) 2005-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifdef CONFIG_NET
+int toi_prepare_usm(void);
+void toi_cleanup_usm(void);
+
+int toi_activate_storage(int force);
+int toi_deactivate_storage(int force);
+extern int toi_usm_init(void);
+extern void toi_usm_exit(void);
+#else
+static inline int toi_usm_init(void) { return 0; }
+static inline void toi_usm_exit(void) { }
+
+static inline int toi_activate_storage(int force)
+{
+	return 0;
+}
+
+static inline int toi_deactivate_storage(int force)
+{
+	return 0;
+}
+
+static inline int toi_prepare_usm(void) { return 0; }
+static inline void toi_cleanup_usm(void) { }
+#endif
+
+enum {
+	USM_MSG_BASE = 0x10,
+
+	/* Kernel -> Userspace */
+	USM_MSG_CONNECT = 0x30,
+	USM_MSG_DISCONNECT = 0x31,
+	USM_MSG_SUCCESS = 0x40,
+	USM_MSG_FAILED = 0x41,
+
+	USM_MSG_MAX,
+};
+
+#ifdef CONFIG_NET
+extern __init int toi_usm_init(void);
+extern __exit void toi_usm_cleanup(void);
+#else
+#define toi_usm_init() do { } while (0)
+#define toi_usm_cleanup() do { } while (0)
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_swap.c linux-2.6-block-custom/kernel/power/tuxonice_swap.c
--- linux-2.6-block/kernel/power/tuxonice_swap.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_swap.c	2008-09-26 19:48:24.027754296 +0900
@@ -0,0 +1,1284 @@
+/*
+ * kernel/power/tuxonice_swap.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * Distributed under GPLv2.
+ *
+ * This file encapsulates functions for usage of swap space as a
+ * backing store.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/swapops.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+
+#include "tuxonice.h"
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice_io.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_extent.h"
+#include "tuxonice_block_io.h"
+#include "tuxonice_alloc.h"
+
+static struct toi_module_ops toi_swapops;
+
+/* --- Struct of pages stored on disk */
+
+struct sig_data {
+	dev_t device;
+	unsigned long sector;
+	int resume_attempted;
+	int orig_sig_type;
+};
+
+union diskpage {
+	union swap_header swh;	/* swh.magic is the only member used */
+	struct sig_data sig_data;
+};
+
+union p_diskpage {
+	union diskpage *pointer;
+	char *ptr;
+	unsigned long address;
+};
+
+enum {
+	IMAGE_SIGNATURE,
+	NO_IMAGE_SIGNATURE,
+	TRIED_RESUME,
+	NO_TRIED_RESUME,
+};
+
+/*
+ * Both of these point to versions of the swap header page. original_sig points
+ * to the data we read from disk at the start of hibernating or checking whether
+ * to resume. no_image is the page stored in the image header, showing what the
+ * swap header page looked like at the start of hibernating.
+ */
+static char *current_signature_page;
+static char no_image_signature_contents[sizeof(struct sig_data)];
+
+/* Devices used for swap */
+static struct toi_bdev_info devinfo[MAX_SWAPFILES];
+
+/* Extent chains for swap & blocks */
+struct hibernate_extent_chain swapextents;
+struct hibernate_extent_chain block_chain[MAX_SWAPFILES];
+
+static dev_t header_dev_t;
+static struct block_device *header_block_device;
+static unsigned long headerblock;
+
+/* For swapfile automatically swapon/off'd. */
+static char swapfilename[32] = "";
+static int toi_swapon_status;
+
+/* Header Page Information */
+static long header_pages_reserved;
+
+/* Swap Pages */
+static long swap_pages_allocated;
+
+/* User Specified Parameters. */
+
+static unsigned long resume_firstblock;
+static dev_t resume_swap_dev_t;
+static struct block_device *resume_block_device;
+
+struct sysinfo swapinfo;
+
+/* Block devices open. */
+struct bdev_opened {
+	dev_t device;
+	struct block_device *bdev;
+};
+
+/*
+ * Entry MAX_SWAPFILES is the resume block device, which may
+ * be a swap device not enabled when we hibernate.
+ * Entry MAX_SWAPFILES + 1 is the header block device, which
+ * is needed before we find out which slot it occupies.
+ *
+ * We use a separate struct to devInfo so that we can track
+ * the bdevs we open, because if we need to abort resuming
+ * prior to the atomic restore, they need to be closed, but
+ * closing them after sucessfully resuming would be wrong.
+ */
+static struct bdev_opened *bdevs_opened[MAX_SWAPFILES + 2];
+
+/**
+ * close_bdev: Close a swap bdev.
+ *
+ * int: The swap entry number to close.
+ */
+static void close_bdev(int i)
+{
+	struct bdev_opened *this = bdevs_opened[i];
+
+	if (!this)
+		return;
+
+	blkdev_put(this->bdev);
+	toi_kfree(8, this);
+	bdevs_opened[i] = NULL;
+}
+
+/**
+ * close_bdevs: Close all bdevs we opened.
+ *
+ * Close all bdevs that we opened and reset the related vars.
+ */
+static void close_bdevs(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_SWAPFILES + 2; i++)
+		close_bdev(i);
+
+	resume_block_device = header_block_device = NULL;
+}
+
+/**
+ * open_bdev: Open a bdev at resume time.
+ *
+ * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
+ * (the user can have resume= pointing at a swap partition/file that isn't
+ * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
+ * header. It will be from a swap partition that was enabled when we hibernated,
+ * but we don't know it's real index until we read that first page.
+ * dev_t: The device major/minor.
+ * display_errs: Whether to try to do this quietly.
+ *
+ * We stored a dev_t in the image header. Open the matching device without
+ * requiring /dev/<whatever> in most cases and record the details needed
+ * to close it later and avoid duplicating work.
+ */
+static struct block_device *open_bdev(int index, dev_t device, int display_errs)
+{
+	struct bdev_opened *this;
+	struct block_device *bdev;
+
+	if (bdevs_opened[index]) {
+		if (bdevs_opened[index]->device == device)
+			return bdevs_opened[index]->bdev;
+
+		close_bdev(index);
+	}
+
+	bdev = toi_open_by_devnum(device, FMODE_READ);
+
+	if (IS_ERR(bdev) || !bdev) {
+		if (display_errs)
+			toi_early_boot_message(1, TOI_CONTINUE_REQ,
+				"Failed to get access to block device "
+				"\"%x\" (error %d).\n Maybe you need "
+				"to run mknod and/or lvmsetup in an "
+				"initrd/ramfs?", device, bdev);
+		return ERR_PTR(-EINVAL);
+	}
+
+	this = toi_kzalloc(8, sizeof(struct bdev_opened), GFP_KERNEL);
+	if (!this) {
+		printk(KERN_WARNING "TuxOnIce: Failed to allocate memory for "
+				"opening a bdev.");
+		blkdev_put(bdev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	bdevs_opened[index] = this;
+	this->device = device;
+	this->bdev = bdev;
+
+	return bdev;
+}
+
+/**
+ * enable_swapfile: Swapon the user specified swapfile prior to hibernating.
+ *
+ * Activate the given swapfile if it wasn't already enabled. Remember whether
+ * we really did swapon it for swapoffing later.
+ */
+static void enable_swapfile(void)
+{
+	int activateswapresult = -EINVAL;
+
+	if (swapfilename[0]) {
+		/* Attempt to swap on with maximum priority */
+		activateswapresult = sys_swapon(swapfilename, 0xFFFF);
+		if (activateswapresult && activateswapresult != -EBUSY)
+			printk("TuxOnIce: The swapfile/partition specified by "
+				"/sys/power/tuxonice/swap/swapfile "
+				"(%s) could not be turned on (error %d). "
+				"Attempting to continue.\n",
+				swapfilename, activateswapresult);
+		if (!activateswapresult)
+			toi_swapon_status = 1;
+	}
+}
+
+/**
+ * disable_swapfile: Swapoff any file swaponed at the start of the cycle.
+ *
+ * If we did successfully swapon a file at the start of the cycle, swapoff
+ * it now (finishing up).
+ */
+static void disable_swapfile(void)
+{
+	if (!toi_swapon_status)
+		return;
+
+	sys_swapoff(swapfilename);
+	toi_swapon_status = 0;
+}
+
+/**
+ * try_to_parse_resume_device: Try to parse resume=
+ *
+ * Any "swap:" has been stripped away and we just have the path to deal with.
+ * We attempt to do name_to_dev_t, open and stat the file. Having opened the
+ * file, get the struct block_device * to match.
+ */
+static int try_to_parse_resume_device(char *commandline, int quiet)
+{
+	struct kstat stat;
+	int error = 0;
+
+	resume_swap_dev_t = name_to_dev_t(commandline);
+
+	if (!resume_swap_dev_t) {
+		struct file *file = filp_open(commandline,
+				O_RDONLY|O_LARGEFILE, 0);
+
+		if (!IS_ERR(file) && file) {
+			vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
+			filp_close(file, NULL);
+		} else
+			error = vfs_stat(commandline, &stat);
+		if (!error)
+			resume_swap_dev_t = stat.rdev;
+	}
+
+	if (!resume_swap_dev_t) {
+		if (quiet)
+			return 1;
+
+		if (test_toi_state(TOI_TRYING_TO_RESUME))
+			toi_early_boot_message(1, TOI_CONTINUE_REQ,
+			  "Failed to translate \"%s\" into a device id.\n",
+			  commandline);
+		else
+			printk("TuxOnIce: Can't translate \"%s\" into a device "
+					"id yet.\n", commandline);
+		return 1;
+	}
+
+	resume_block_device = open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 0);
+	if (IS_ERR(resume_block_device)) {
+		if (!quiet)
+			toi_early_boot_message(1, TOI_CONTINUE_REQ,
+				"Failed to get access to \"%s\", where"
+				" the swap header should be found.",
+				commandline);
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * If we have read part of the image, we might have filled  memory with
+ * data that should be zeroed out.
+ */
+static void toi_swap_noresume_reset(void)
+{
+	toi_bio_ops.rw_cleanup(READ);
+	memset((char *) &devinfo, 0, sizeof(devinfo));
+}
+
+static int get_current_signature(void)
+{
+	int result;
+
+	if (current_signature_page)
+		return 0;
+
+	current_signature_page = (char *) toi_get_zeroed_page(38,
+			TOI_ATOMIC_GFP);
+	if (!current_signature_page)
+		return -ENOMEM;
+
+	result = toi_bio_ops.bdev_page_io(READ, resume_block_device,
+		resume_firstblock, virt_to_page(current_signature_page));
+
+	return result;
+}
+
+static int parse_signature(void)
+{
+	union p_diskpage swap_header_page;
+	struct sig_data *sig;
+	int type;
+	char *swap_header;
+	const char *sigs[] = {
+		"SWAP-SPACE", "SWAPSPACE2", "S1SUSP", "S2SUSP", "S1SUSPEND"
+	};
+
+	if (!current_signature_page) {
+		int result = get_current_signature();
+
+		if (result)
+			return result;
+	}
+
+	swap_header_page = (union p_diskpage) current_signature_page;
+	sig = (struct sig_data *) current_signature_page;
+	swap_header = swap_header_page.pointer->swh.magic.magic;
+
+	for (type = 0; type < 5; type++)
+		if (!memcmp(sigs[type], swap_header, strlen(sigs[type])))
+			return type;
+
+	if (memcmp(tuxonice_signature, swap_header, sizeof(tuxonice_signature)))
+		return -1;
+
+	header_dev_t = sig->device;
+	clear_toi_state(TOI_RESUMED_BEFORE);
+	if (sig->resume_attempted)
+		set_toi_state(TOI_RESUMED_BEFORE);
+	headerblock = sig->sector;
+
+	return 10;
+}
+
+static void forget_signatures(void)
+{
+	if (current_signature_page) {
+		toi_free_page(38, (unsigned long) current_signature_page);
+		current_signature_page = NULL;
+	}
+}
+
+/*
+ * write_modified_signature
+ *
+ * Write a (potentially) modified signature page without forgetting the
+ * original contents.
+ */
+static int write_modified_signature(int modification)
+{
+	union p_diskpage swap_header_page;
+	struct swap_info_struct *si;
+	int result;
+	char *orig_sig;
+
+	/* In case we haven't already */
+	result = get_current_signature();
+
+	if (result)
+		return result;
+
+	swap_header_page.address = toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
+
+	if (!swap_header_page.address)
+		return -ENOMEM;
+
+	memcpy(swap_header_page.ptr, current_signature_page, PAGE_SIZE);
+
+	switch (modification) {
+	case IMAGE_SIGNATURE:
+
+		memcpy(no_image_signature_contents, swap_header_page.ptr,
+				sizeof(no_image_signature_contents));
+
+		/* Get the details of the header first page. */
+		toi_extent_state_goto_start(&toi_writer_posn);
+		toi_bio_ops.forward_one_page(1);
+
+		si = get_swap_info_struct(toi_writer_posn.current_chain);
+
+		/* Prepare the signature */
+		swap_header_page.pointer->sig_data.device = si->bdev->bd_dev;
+		swap_header_page.pointer->sig_data.sector =
+			toi_writer_posn.current_offset;
+		swap_header_page.pointer->sig_data.resume_attempted = 0;
+		swap_header_page.pointer->sig_data.orig_sig_type =
+			parse_signature();
+
+		memcpy(swap_header_page.pointer->swh.magic.magic,
+				tuxonice_signature, sizeof(tuxonice_signature));
+
+		break;
+	case NO_IMAGE_SIGNATURE:
+		if (!swap_header_page.pointer->sig_data.orig_sig_type)
+			orig_sig = "SWAP-SPACE";
+		else
+			orig_sig = "SWAPSPACE2";
+
+		memcpy(swap_header_page.pointer->swh.magic.magic, orig_sig, 10);
+		memcpy(swap_header_page.ptr, no_image_signature_contents,
+				sizeof(no_image_signature_contents));
+		break;
+	case TRIED_RESUME:
+		swap_header_page.pointer->sig_data.resume_attempted = 1;
+		break;
+	case NO_TRIED_RESUME:
+		swap_header_page.pointer->sig_data.resume_attempted = 0;
+		break;
+	}
+
+	result = toi_bio_ops.bdev_page_io(WRITE, resume_block_device,
+		resume_firstblock, virt_to_page(swap_header_page.address));
+
+	memcpy(current_signature_page, swap_header_page.ptr, PAGE_SIZE);
+
+	toi_free_page(38, swap_header_page.address);
+
+	return result;
+}
+
+/*
+ * apply_header_reservation
+ *
+ * Use 0 (READ) to forward_one_page so it doesn't complain if we haven't
+ * allocated storage yet.
+ */
+static int apply_header_reservation(void)
+{
+	int i;
+
+	toi_extent_state_goto_start(&toi_writer_posn);
+	toi_bio_ops.forward_one_page(0); /* To first page */
+
+	for (i = 0; i < header_pages_reserved; i++)
+		if (toi_bio_ops.forward_one_page(0))
+			return -ENOSPC;
+
+	/* The end of header pages will be the start of pageset 2;
+	 * we are now sitting on the first pageset2 page. */
+	toi_extent_state_save(&toi_writer_posn, &toi_writer_posn_save[2]);
+	return 0;
+}
+
+static void toi_swap_reserve_header_space(int request)
+{
+	header_pages_reserved = (long) request;
+
+	/* If we've already allocated storage (hence ignoring return value): */
+	apply_header_reservation();
+}
+
+static void free_block_chains(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_SWAPFILES; i++)
+		if (block_chain[i].first)
+			toi_put_extent_chain(&block_chain[i]);
+}
+
+static int add_blocks_to_extent_chain(int chain, int start, int end)
+{
+	if (test_action_state(TOI_TEST_BIO))
+		printk(KERN_INFO "Adding extent chain %d %d-%d.\n", chain,
+				start << devinfo[chain].bmap_shift,
+				end << devinfo[chain].bmap_shift);
+
+	if (toi_add_to_extent_chain(&block_chain[chain], start, end)) {
+		free_block_chains();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+
+static int get_main_pool_phys_params(void)
+{
+	struct hibernate_extent *extentpointer = NULL;
+	unsigned long address;
+	int extent_min = -1, extent_max = -1, last_chain = -1;
+
+	free_block_chains();
+
+	toi_extent_for_each(&swapextents, extentpointer, address) {
+		swp_entry_t swap_address = (swp_entry_t) { address };
+		pgoff_t offset = swp_offset(swap_address);
+		unsigned swapfilenum = swp_type(swap_address);
+		struct swap_info_struct *sis =
+			get_swap_info_struct(swapfilenum);
+		sector_t new_sector = map_swap_page(sis, offset);
+
+		if ((new_sector == extent_max + 1) &&
+		    (last_chain == swapfilenum)) {
+			extent_max++;
+			continue;
+		}
+
+		if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
+					extent_min, extent_max))
+			return -ENOMEM;
+
+		extent_min = extent_max = new_sector;
+		last_chain = swapfilenum;
+	}
+
+	if (extent_min > -1 && add_blocks_to_extent_chain(last_chain,
+				extent_min, extent_max))
+			return -ENOMEM;
+
+	return apply_header_reservation();
+}
+
+static long raw_to_real(long raw)
+{
+	long result;
+
+	result = raw - (raw * (sizeof(unsigned long) + sizeof(int)) +
+		(PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
+		(PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
+
+	return result < 0 ? 0 : result;
+}
+
+static int toi_swap_storage_allocated(void)
+{
+	return (int) raw_to_real(swap_pages_allocated - header_pages_reserved);
+}
+
+/*
+ * We can't just remember the value from allocation time, because other
+ * processes might have allocated swap in the mean time.
+ */
+static int toi_swap_storage_available(void)
+{
+	si_swapinfo(&swapinfo);
+	return (int) raw_to_real((long) swapinfo.freeswap +
+			swap_pages_allocated - header_pages_reserved);
+}
+
+static int toi_swap_initialise(int starting_cycle)
+{
+	if (!starting_cycle)
+		return 0;
+
+	enable_swapfile();
+
+	if (resume_swap_dev_t && !resume_block_device &&
+	    IS_ERR(resume_block_device =
+			open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 1)))
+		return 1;
+
+	return 0;
+}
+
+static void toi_swap_cleanup(int ending_cycle)
+{
+	if (ending_cycle)
+		disable_swapfile();
+
+	close_bdevs();
+
+	forget_signatures();
+}
+
+static int toi_swap_release_storage(void)
+{
+	if (test_action_state(TOI_KEEP_IMAGE) &&
+	    test_toi_state(TOI_NOW_RESUMING))
+		return 0;
+
+	header_pages_reserved = 0;
+	swap_pages_allocated = 0;
+
+	if (swapextents.first) {
+		/* Free swap entries */
+		struct hibernate_extent *extentpointer;
+		unsigned long extentvalue;
+		toi_extent_for_each(&swapextents, extentpointer,
+				extentvalue)
+			swap_free((swp_entry_t) { extentvalue });
+
+		toi_put_extent_chain(&swapextents);
+
+		free_block_chains();
+	}
+
+	return 0;
+}
+
+static void free_swap_range(unsigned long min, unsigned long max)
+{
+	int j;
+
+	for (j = min; j <= max; j++)
+		swap_free((swp_entry_t) { j });
+}
+
+/*
+ * Round robin allocation (where swap storage has the same priority).
+ * could make this very inefficient, so we track extents allocated on
+ * a per-swapfile basis.
+ *
+ * We ignore here the fact that some space is for the header and doesn't
+ * have the overhead. It will only rarely make a 1 page difference.
+ */
+static int toi_swap_allocate_storage(int request)
+{
+	int i, result = 0, to_add[MAX_SWAPFILES], pages_to_get, extra_pages,
+	    gotten = 0;
+	unsigned long extent_min[MAX_SWAPFILES], extent_max[MAX_SWAPFILES];
+
+	extra_pages = DIV_ROUND_UP(request * (sizeof(unsigned long)
+			       + sizeof(int)), PAGE_SIZE);
+	pages_to_get = request + extra_pages - swapextents.size;
+
+	if (pages_to_get < 1)
+		return 0;
+
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		struct swap_info_struct *si = get_swap_info_struct(i);
+		to_add[i] = 0;
+		if (!si->bdev)
+			continue;
+		devinfo[i].bdev = si->bdev;
+		devinfo[i].dev_t = si->bdev->bd_dev;
+		devinfo[i].bmap_shift = 3;
+		devinfo[i].blocks_per_page = 1;
+	}
+
+	for (i = 0; i < pages_to_get; i++) {
+		swp_entry_t entry;
+		unsigned long new_value;
+		unsigned swapfilenum;
+
+		entry = get_swap_page();
+		if (!entry.val)
+			break;
+
+		swapfilenum = swp_type(entry);
+		new_value = entry.val;
+
+		if (!to_add[swapfilenum]) {
+			to_add[swapfilenum] = 1;
+			extent_min[swapfilenum] = new_value;
+			extent_max[swapfilenum] = new_value;
+			gotten++;
+			continue;
+		}
+
+		if (new_value == extent_max[swapfilenum] + 1) {
+			extent_max[swapfilenum]++;
+			gotten++;
+			continue;
+		}
+
+		if (toi_add_to_extent_chain(&swapextents,
+					extent_min[swapfilenum],
+					extent_max[swapfilenum])) {
+			printk(KERN_INFO "Failed to allocate extent for "
+					"%lu-%lu.\n", extent_min[swapfilenum],
+					extent_max[swapfilenum]);
+			free_swap_range(extent_min[swapfilenum],
+					extent_max[swapfilenum]);
+			swap_free(entry);
+			gotten -= (extent_max[swapfilenum] -
+					extent_min[swapfilenum] + 1);
+			/* Don't try to add again below */
+			to_add[swapfilenum] = 0;
+			break;
+		} else {
+			extent_min[swapfilenum] = new_value;
+			extent_max[swapfilenum] = new_value;
+			gotten++;
+		}
+	}
+
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		if (!to_add[i] || !toi_add_to_extent_chain(&swapextents,
+					extent_min[i], extent_max[i]))
+			continue;
+
+		free_swap_range(extent_min[i], extent_max[i]);
+		gotten -= (extent_max[i] - extent_min[i] + 1);
+		break;
+	}
+
+	if (gotten < pages_to_get)
+		result = -ENOSPC;
+
+	swap_pages_allocated += (long) gotten;
+
+	return result ? result : get_main_pool_phys_params();
+}
+
+static int toi_swap_write_header_init(void)
+{
+	int i, result;
+	struct swap_info_struct *si;
+
+	toi_bio_ops.rw_init(WRITE, 0);
+	toi_writer_buffer_posn = 0;
+
+	/* Info needed to bootstrap goes at the start of the header.
+	 * First we save the positions and devinfo, including the number
+	 * of header pages. Then we save the structs containing data needed
+	 * for reading the header pages back.
+	 * Note that even if header pages take more than one page, when we
+	 * read back the info, we will have restored the location of the
+	 * next header page by the time we go to use it.
+	 */
+
+	result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
+			(char *) &no_image_signature_contents,
+			sizeof(struct sig_data));
+
+	if (result)
+		return result;
+
+	/* Forward one page will be done prior to the read */
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		si = get_swap_info_struct(i);
+		if (si->swap_file)
+			devinfo[i].dev_t = si->bdev->bd_dev;
+		else
+			devinfo[i].dev_t = (dev_t) 0;
+	}
+
+	result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
+			(char *) &toi_writer_posn_save,
+			sizeof(toi_writer_posn_save));
+
+	if (result)
+		return result;
+
+	result = toi_bio_ops.rw_header_chunk(WRITE, &toi_swapops,
+			(char *) &devinfo, sizeof(devinfo));
+
+	if (result)
+		return result;
+
+	for (i = 0; i < MAX_SWAPFILES; i++)
+		toi_serialise_extent_chain(&toi_swapops, &block_chain[i]);
+
+	return 0;
+}
+
+static int toi_swap_write_header_cleanup(void)
+{
+	/* Write any unsaved data */
+	if (toi_writer_buffer_posn)
+		toi_bio_ops.write_header_chunk_finish();
+
+	toi_bio_ops.finish_all_io();
+
+	/* Set signature to save we have an image */
+	return write_modified_signature(IMAGE_SIGNATURE);
+}
+
+/* ------------------------- HEADER READING ------------------------- */
+
+/*
+ * read_header_init()
+ *
+ * Description:
+ * 1. Attempt to read the device specified with resume=.
+ * 2. Check the contents of the swap header for our signature.
+ * 3. Warn, ignore, reset and/or continue as appropriate.
+ * 4. If continuing, read the toi_swap configuration section
+ *    of the header and set up block device info so we can read
+ *    the rest of the header & image.
+ *
+ * Returns:
+ * May not return if user choose to reboot at a warning.
+ * -EINVAL if cannot resume at this time. Booting should continue
+ * normally.
+ */
+
+static int toi_swap_read_header_init(void)
+{
+	int i, result = 0;
+	toi_writer_buffer_posn = 0;
+
+	if (!header_dev_t) {
+		printk(KERN_INFO "read_header_init called when we haven't "
+				"verified there is an image!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If the header is not on the resume_swap_dev_t, get the resume device
+	 * first.
+	 */
+	if (header_dev_t != resume_swap_dev_t) {
+		header_block_device = open_bdev(MAX_SWAPFILES + 1,
+				header_dev_t, 1);
+
+		if (IS_ERR(header_block_device))
+			return PTR_ERR(header_block_device);
+	} else
+		header_block_device = resume_block_device;
+
+	toi_bio_ops.read_header_init();
+
+	/*
+	 * Read toi_swap configuration.
+	 * Headerblock size taken into account already.
+	 */
+	result = toi_bio_ops.bdev_page_io(READ, header_block_device,
+			headerblock << 3,
+			virt_to_page((unsigned long) toi_writer_buffer));
+	if (result)
+		return result;
+
+	memcpy(&no_image_signature_contents, toi_writer_buffer,
+			sizeof(no_image_signature_contents));
+
+	toi_writer_buffer_posn = sizeof(no_image_signature_contents);
+
+	memcpy(&toi_writer_posn_save, toi_writer_buffer +
+			toi_writer_buffer_posn, sizeof(toi_writer_posn_save));
+
+	toi_writer_buffer_posn += sizeof(toi_writer_posn_save);
+
+	memcpy(&devinfo, toi_writer_buffer + toi_writer_buffer_posn,
+			sizeof(devinfo));
+
+	toi_writer_buffer_posn += sizeof(devinfo);
+
+	/* Restore device info */
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		dev_t thisdevice = devinfo[i].dev_t;
+		struct block_device *result;
+
+		devinfo[i].bdev = NULL;
+
+		if (!thisdevice)
+			continue;
+
+		if (thisdevice == resume_swap_dev_t) {
+			devinfo[i].bdev = resume_block_device;
+			continue;
+		}
+
+		if (thisdevice == header_dev_t) {
+			devinfo[i].bdev = header_block_device;
+			continue;
+		}
+
+		result = open_bdev(i, thisdevice, 1);
+		if (IS_ERR(result))
+			return PTR_ERR(result);
+		devinfo[i].bdev = bdevs_opened[i]->bdev;
+	}
+
+	toi_extent_state_goto_start(&toi_writer_posn);
+	toi_bio_ops.set_extra_page_forward();
+
+	for (i = 0; i < MAX_SWAPFILES && !result; i++)
+		result = toi_load_extent_chain(&block_chain[i]);
+
+	return result;
+}
+
+static int toi_swap_read_header_cleanup(void)
+{
+	toi_bio_ops.rw_cleanup(READ);
+	return 0;
+}
+
+/*
+ * workspace_size
+ *
+ * Description:
+ * Returns the number of bytes of RAM needed for this
+ * code to do its work. (Used when calculating whether
+ * we have enough memory to be able to hibernate & resume).
+ *
+ */
+static int toi_swap_memory_needed(void)
+{
+	return 1;
+}
+
+/*
+ * Print debug info
+ *
+ * Description:
+ */
+static int toi_swap_print_debug_stats(char *buffer, int size)
+{
+	int len = 0;
+	struct sysinfo sysinfo;
+
+	if (toiActiveAllocator != &toi_swapops) {
+		len = snprintf_used(buffer, size,
+				"- SwapAllocator inactive.\n");
+		return len;
+	}
+
+	len = snprintf_used(buffer, size, "- SwapAllocator active.\n");
+	if (swapfilename[0])
+		len += snprintf_used(buffer+len, size-len,
+			"  Attempting to automatically swapon: %s.\n",
+			swapfilename);
+
+	si_swapinfo(&sysinfo);
+
+	len += snprintf_used(buffer+len, size-len,
+			"  Swap available for image: %ld pages.\n",
+			(int) sysinfo.freeswap + toi_swap_storage_allocated());
+
+	return len;
+}
+
+/*
+ * Storage needed
+ *
+ * Returns amount of space in the swap header required
+ * for the toi_swap's data. This ignores the links between
+ * pages, which we factor in when allocating the space.
+ *
+ * We ensure the space is allocated, but actually save the
+ * data from write_header_init and therefore don't also define a
+ * save_config_info routine.
+ */
+static int toi_swap_storage_needed(void)
+{
+	int i, result;
+	result = sizeof(toi_writer_posn_save) + sizeof(devinfo);
+
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		result += 3 * sizeof(int);
+		result += (2 * sizeof(unsigned long) *
+			block_chain[i].num_extents);
+	}
+
+	return result;
+}
+
+/*
+ * Image_exists
+ *
+ * Returns -1 if don't know, otherwise 0 (no) or 1 (yes).
+ */
+static int toi_swap_image_exists(int quiet)
+{
+	int signature_found;
+
+	if (!resume_swap_dev_t) {
+		if (!quiet)
+			printk(KERN_INFO "Not even trying to read header "
+				"because resume_swap_dev_t is not set.\n");
+		return -1;
+	}
+
+	if (!resume_block_device &&
+	    IS_ERR(resume_block_device =
+			open_bdev(MAX_SWAPFILES, resume_swap_dev_t, 1))) {
+		if (!quiet)
+			printk(KERN_INFO "Failed to open resume dev_t (%x).\n",
+				resume_swap_dev_t);
+		return -1;
+	}
+
+	signature_found = parse_signature();
+
+	switch (signature_found) {
+	case -ENOMEM:
+		return -1;
+	case -1:
+		if (!quiet)
+			printk(KERN_ERR "TuxOnIce: Unable to find a signature."
+				" Could you have moved a swap file?\n");
+		return -1;
+	case 0:
+	case 1:
+		if (!quiet)
+			printk(KERN_INFO "TuxOnIce: Normal swapspace found.\n");
+		return 0;
+	case 2:
+	case 3:
+	case 4:
+		if (!quiet)
+			printk(KERN_INFO "TuxOnIce: Detected another "
+				"implementation's signature.\n");
+		return 0;
+	case 10:
+		if (!quiet)
+			printk(KERN_INFO "TuxOnIce: Detected TuxOnIce binary "
+				"signature.\n");
+		return 1;
+	}
+
+	BUG();
+	return 0;
+}
+
+/* toi_swap_remove_image
+ *
+ */
+static int toi_swap_remove_image(void)
+{
+	/*
+	 * If nr_hibernates == 0, we must be booting, so no swap pages
+	 * will be recorded as used yet.
+	 */
+
+	if (nr_hibernates)
+		toi_swap_release_storage();
+
+	/*
+	 * We don't do a sanity check here: we want to restore the swap
+	 * whatever version of kernel made the hibernate image.
+	 *
+	 * We need to write swap, but swap may not be enabled so
+	 * we write the device directly
+	 *
+	 * If we don't have an current_signature_page, we didn't
+	 * read an image header, so don't change anything.
+	 */
+
+	return toi_swap_image_exists(1) ?
+		write_modified_signature(NO_IMAGE_SIGNATURE) : 0;
+}
+
+/*
+ * Mark resume attempted.
+ *
+ * Record that we tried to resume from this image. We have already read the
+ * signature in. We just need to write the modified version.
+ */
+static int toi_swap_mark_resume_attempted(int mark)
+{
+	if (!resume_swap_dev_t) {
+		printk(KERN_INFO "Not even trying to record attempt at resuming"
+				" because resume_swap_dev_t is not set.\n");
+		return -ENODEV;
+	}
+
+	return write_modified_signature(mark ? TRIED_RESUME : NO_TRIED_RESUME);
+}
+
+/*
+ * Parse Image Location
+ *
+ * Attempt to parse a resume= parameter.
+ * Swap Writer accepts:
+ * resume=swap:DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
+ *
+ * Where:
+ * DEVNAME is convertable to a dev_t by name_to_dev_t
+ * FIRSTBLOCK is the location of the first block in the swap file
+ * (specifying for a swap partition is nonsensical but not prohibited).
+ * Data is validated by attempting to read a swap header from the
+ * location given. Failure will result in toi_swap refusing to
+ * save an image, and a reboot with correct parameters will be
+ * necessary.
+ */
+static int toi_swap_parse_sig_location(char *commandline,
+		int only_allocator, int quiet)
+{
+	char *thischar, *devstart, *colon = NULL;
+	int signature_found, result = -EINVAL, temp_result;
+
+	if (strncmp(commandline, "swap:", 5)) {
+		/*
+		 * Failing swap:, we'll take a simple
+		 * resume=/dev/hda2, but fall through to
+		 * other allocators if /dev/ isn't matched.
+		 */
+		if (strncmp(commandline, "/dev/", 5))
+			return 1;
+	} else
+		commandline += 5;
+
+	devstart = thischar = commandline;
+	while ((*thischar != ':') && (*thischar != '@') &&
+		((thischar - commandline) < 250) && (*thischar))
+		thischar++;
+
+	if (*thischar == ':') {
+		colon = thischar;
+		*colon = 0;
+		thischar++;
+	}
+
+	while ((thischar - commandline) < 250 && *thischar)
+		thischar++;
+
+	if (colon)
+		resume_firstblock = (int) simple_strtoul(colon + 1, NULL, 0);
+	else
+		resume_firstblock = 0;
+
+	clear_toi_state(TOI_CAN_HIBERNATE);
+	clear_toi_state(TOI_CAN_RESUME);
+
+	temp_result = try_to_parse_resume_device(devstart, quiet);
+
+	if (colon)
+		*colon = ':';
+
+	if (temp_result)
+		return -EINVAL;
+
+	signature_found = toi_swap_image_exists(quiet);
+
+	if (signature_found != -1) {
+		result = 0;
+
+		toi_bio_ops.set_devinfo(devinfo);
+		toi_writer_posn.chains = &block_chain[0];
+		toi_writer_posn.num_chains = MAX_SWAPFILES;
+		set_toi_state(TOI_CAN_HIBERNATE);
+		set_toi_state(TOI_CAN_RESUME);
+	} else
+		if (!quiet)
+			printk(KERN_ERR "TuxOnIce: SwapAllocator: No swap "
+				"signature found at %s.\n", devstart);
+	return result;
+}
+
+static int header_locations_read_sysfs(const char *page, int count)
+{
+	int i, printedpartitionsmessage = 0, len = 0, haveswap = 0;
+	struct inode *swapf = 0;
+	int zone;
+	char *path_page = (char *) toi_get_free_page(10, GFP_KERNEL);
+	char *path, *output = (char *) page;
+	int path_len;
+
+	if (!page)
+		return 0;
+
+	for (i = 0; i < MAX_SWAPFILES; i++) {
+		struct swap_info_struct *si =  get_swap_info_struct(i);
+
+		if (!si->swap_file)
+			continue;
+
+		if (S_ISBLK(si->swap_file->f_mapping->host->i_mode)) {
+			haveswap = 1;
+			if (!printedpartitionsmessage) {
+				len += sprintf(output + len,
+					"For swap partitions, simply use the "
+					"format: resume=swap:/dev/hda1.\n");
+				printedpartitionsmessage = 1;
+			}
+		} else {
+			path_len = 0;
+
+			path = d_path(&si->swap_file->f_path, path_page,
+					PAGE_SIZE);
+			path_len = snprintf(path_page, 31, "%s", path);
+
+			haveswap = 1;
+			swapf = si->swap_file->f_mapping->host;
+			zone = bmap(swapf, 0);
+			if (!zone) {
+				len += sprintf(output + len,
+					"Swapfile %s has been corrupted. Reuse"
+					" mkswap on it and try again.\n",
+					path_page);
+			} else {
+				char name_buffer[255];
+				len += sprintf(output + len,
+					"For swapfile `%s`,"
+					" use resume=swap:/dev/%s:0x%x.\n",
+					path_page,
+					bdevname(si->bdev, name_buffer),
+					zone << (swapf->i_blkbits - 9));
+			}
+		}
+	}
+
+	if (!haveswap)
+		len = sprintf(output, "You need to turn on swap partitions "
+				"before examining this file.\n");
+
+	toi_free_page(10, (unsigned long) path_page);
+	return len;
+}
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{
+	 TOI_ATTR("swapfilename", SYSFS_RW),
+	 SYSFS_STRING(swapfilename, 255, 0)
+	},
+
+	{
+	 TOI_ATTR("headerlocations", SYSFS_READONLY),
+	 SYSFS_CUSTOM(header_locations_read_sysfs, NULL, 0)
+	},
+
+	{ TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&toi_swapops.enabled, 0, 1, 0),
+	  .write_side_effect		= attempt_to_parse_resume_device2,
+	}
+};
+
+static struct toi_module_ops toi_swapops = {
+	.type					= WRITER_MODULE,
+	.name					= "swap storage",
+	.directory				= "swap",
+	.module					= THIS_MODULE,
+	.memory_needed				= toi_swap_memory_needed,
+	.print_debug_info			= toi_swap_print_debug_stats,
+	.storage_needed				= toi_swap_storage_needed,
+	.initialise				= toi_swap_initialise,
+	.cleanup				= toi_swap_cleanup,
+
+	.noresume_reset		= toi_swap_noresume_reset,
+	.storage_available 	= toi_swap_storage_available,
+	.storage_allocated	= toi_swap_storage_allocated,
+	.release_storage	= toi_swap_release_storage,
+	.reserve_header_space	= toi_swap_reserve_header_space,
+	.allocate_storage	= toi_swap_allocate_storage,
+	.image_exists		= toi_swap_image_exists,
+	.mark_resume_attempted	= toi_swap_mark_resume_attempted,
+	.write_header_init	= toi_swap_write_header_init,
+	.write_header_cleanup	= toi_swap_write_header_cleanup,
+	.read_header_init	= toi_swap_read_header_init,
+	.read_header_cleanup	= toi_swap_read_header_cleanup,
+	.remove_image		= toi_swap_remove_image,
+	.parse_sig_location	= toi_swap_parse_sig_location,
+
+	.sysfs_data		= sysfs_params,
+	.num_sysfs_entries	= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+/* ---- Registration ---- */
+static __init int toi_swap_load(void)
+{
+	toi_swapops.rw_init = toi_bio_ops.rw_init;
+	toi_swapops.rw_cleanup = toi_bio_ops.rw_cleanup;
+	toi_swapops.read_page = toi_bio_ops.read_page;
+	toi_swapops.write_page = toi_bio_ops.write_page;
+	toi_swapops.rw_header_chunk = toi_bio_ops.rw_header_chunk;
+	toi_swapops.rw_header_chunk_noreadahead =
+		toi_bio_ops.rw_header_chunk_noreadahead;
+	toi_swapops.io_flusher = toi_bio_ops.io_flusher;
+
+	return toi_register_module(&toi_swapops);
+}
+
+#ifdef MODULE
+static __exit void toi_swap_unload(void)
+{
+	toi_unregister_module(&toi_swapops);
+}
+
+module_init(toi_swap_load);
+module_exit(toi_swap_unload);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("TuxOnIce SwapAllocator");
+#else
+late_initcall(toi_swap_load);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_sysfs.c linux-2.6-block-custom/kernel/power/tuxonice_sysfs.c
--- linux-2.6-block/kernel/power/tuxonice_sysfs.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_sysfs.c	2008-09-26 19:48:24.027754296 +0900
@@ -0,0 +1,335 @@
+/*
+ * kernel/power/tuxonice_sysfs.c
+ *
+ * Copyright (C) 2002-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * This file contains support for sysfs entries for tuning TuxOnIce.
+ *
+ * We have a generic handler that deals with the most common cases, and
+ * hooks for special handlers to use.
+ */
+
+#include <linux/suspend.h>
+#include <linux/module.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice.h"
+#include "tuxonice_storage.h"
+#include "tuxonice_alloc.h"
+
+static int toi_sysfs_initialised;
+
+static void toi_initialise_sysfs(void);
+
+static struct toi_sysfs_data sysfs_params[];
+
+#define to_sysfs_data(_attr) container_of(_attr, struct toi_sysfs_data, attr)
+
+static void toi_main_wrapper(void)
+{
+	_toi_try_hibernate(0);
+}
+
+static ssize_t toi_attr_show(struct kobject *kobj, struct attribute *attr,
+			      char *page)
+{
+	struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+	int len = 0;
+
+	if (toi_start_anything(0))
+		return -EBUSY;
+
+	if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+		toi_prepare_usm();
+
+	switch (sysfs_data->type) {
+	case TOI_SYSFS_DATA_CUSTOM:
+		len = (sysfs_data->data.special.read_sysfs) ?
+			(sysfs_data->data.special.read_sysfs)(page, PAGE_SIZE)
+			: 0;
+		break;
+	case TOI_SYSFS_DATA_BIT:
+		len = sprintf(page, "%d\n",
+			-test_bit(sysfs_data->data.bit.bit,
+				sysfs_data->data.bit.bit_vector));
+		break;
+	case TOI_SYSFS_DATA_INTEGER:
+		len = sprintf(page, "%d\n",
+			*(sysfs_data->data.integer.variable));
+		break;
+	case TOI_SYSFS_DATA_LONG:
+		len = sprintf(page, "%ld\n",
+			*(sysfs_data->data.a_long.variable));
+		break;
+	case TOI_SYSFS_DATA_UL:
+		len = sprintf(page, "%lu\n",
+			*(sysfs_data->data.ul.variable));
+		break;
+	case TOI_SYSFS_DATA_STRING:
+		len = sprintf(page, "%s\n",
+			sysfs_data->data.string.variable);
+		break;
+	}
+	/* Side effect routine? */
+	if (sysfs_data->read_side_effect)
+		sysfs_data->read_side_effect();
+
+	if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_READ)
+		toi_cleanup_usm();
+
+	toi_finish_anything(0);
+
+	return len;
+}
+
+#define BOUND(_variable, _type) \
+	do { \
+	if (*_variable < sysfs_data->data._type.minimum) \
+		*_variable = sysfs_data->data._type.minimum; \
+	else if (*_variable > sysfs_data->data._type.maximum) \
+		*_variable = sysfs_data->data._type.maximum; \
+	} while (0)
+
+static ssize_t toi_attr_store(struct kobject *kobj, struct attribute *attr,
+		const char *my_buf, size_t count)
+{
+	int assigned_temp_buffer = 0, result = count;
+	struct toi_sysfs_data *sysfs_data = to_sysfs_data(attr);
+
+	if (toi_start_anything((sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME)))
+		return -EBUSY;
+
+	((char *) my_buf)[count] = 0;
+
+	if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+		toi_prepare_usm();
+
+	switch (sysfs_data->type) {
+	case TOI_SYSFS_DATA_CUSTOM:
+		if (sysfs_data->data.special.write_sysfs)
+			result = (sysfs_data->data.special.write_sysfs)(my_buf,
+					count);
+		break;
+	case TOI_SYSFS_DATA_BIT:
+		{
+		int value = simple_strtoul(my_buf, NULL, 0);
+		if (value)
+			set_bit(sysfs_data->data.bit.bit,
+				(sysfs_data->data.bit.bit_vector));
+		else
+			clear_bit(sysfs_data->data.bit.bit,
+				(sysfs_data->data.bit.bit_vector));
+		}
+		break;
+	case TOI_SYSFS_DATA_INTEGER:
+		{
+			int *variable =
+				sysfs_data->data.integer.variable;
+			*variable = simple_strtol(my_buf, NULL, 0);
+			BOUND(variable, integer);
+			break;
+		}
+	case TOI_SYSFS_DATA_LONG:
+		{
+			long *variable =
+				sysfs_data->data.a_long.variable;
+			*variable = simple_strtol(my_buf, NULL, 0);
+			BOUND(variable, a_long);
+			break;
+		}
+	case TOI_SYSFS_DATA_UL:
+		{
+			unsigned long *variable =
+				sysfs_data->data.ul.variable;
+			*variable = simple_strtoul(my_buf, NULL, 0);
+			BOUND(variable, ul);
+			break;
+		}
+		break;
+	case TOI_SYSFS_DATA_STRING:
+		{
+			int copy_len = count;
+			char *variable =
+				sysfs_data->data.string.variable;
+
+			if (sysfs_data->data.string.max_length &&
+			    (copy_len > sysfs_data->data.string.max_length))
+				copy_len = sysfs_data->data.string.max_length;
+
+			if (!variable) {
+				variable = (char *) toi_get_zeroed_page(31,
+						TOI_ATOMIC_GFP);
+				sysfs_data->data.string.variable = variable;
+				assigned_temp_buffer = 1;
+			}
+			strncpy(variable, my_buf, copy_len);
+			if (copy_len && my_buf[copy_len - 1] == '\n')
+				variable[count - 1] = 0;
+			variable[count] = 0;
+		}
+		break;
+	}
+
+	/* Side effect routine? */
+	if (sysfs_data->write_side_effect)
+		sysfs_data->write_side_effect();
+
+	/* Free temporary buffers */
+	if (assigned_temp_buffer) {
+		toi_free_page(31,
+			(unsigned long) sysfs_data->data.string.variable);
+		sysfs_data->data.string.variable = NULL;
+	}
+
+	if (sysfs_data->flags & SYSFS_NEEDS_SM_FOR_WRITE)
+		toi_cleanup_usm();
+
+	toi_finish_anything(sysfs_data->flags & SYSFS_HIBERNATE_OR_RESUME);
+
+	return result;
+}
+
+static struct sysfs_ops toi_sysfs_ops = {
+	.show	= &toi_attr_show,
+	.store	= &toi_attr_store,
+};
+
+static struct kobj_type toi_ktype = {
+	.sysfs_ops	= &toi_sysfs_ops,
+};
+
+struct kobject *tuxonice_kobj;
+
+/* Non-module sysfs entries.
+ *
+ * This array contains entries that are automatically registered at
+ * boot. Modules and the console code register their own entries separately.
+ *
+ * NB: If you move do_hibernate, change toi_write_sysfs's test so that
+ * toi_start_anything still gets a 1 when the user echos > do_hibernate!
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+	{ TOI_ATTR("do_hibernate", SYSFS_WRITEONLY),
+	  SYSFS_CUSTOM(NULL, NULL, SYSFS_HIBERNATING),
+	  .write_side_effect = toi_main_wrapper
+	},
+
+	{ TOI_ATTR("do_resume", SYSFS_WRITEONLY),
+	  SYSFS_CUSTOM(NULL, NULL, SYSFS_RESUMING),
+	  .write_side_effect = __toi_try_resume
+	},
+
+};
+
+void remove_toi_sysdir(struct kobject *kobj)
+{
+	if (!kobj)
+		return;
+
+	kobject_put(kobj);
+}
+
+struct kobject *make_toi_sysdir(char *name)
+{
+	struct kobject *kobj = kobject_create_and_add(name, tuxonice_kobj);
+
+	if (!kobj) {
+		printk(KERN_INFO "TuxOnIce: Can't allocate kobject for sysfs "
+				"dir!\n");
+		return NULL;
+	}
+
+	kobj->ktype = &toi_ktype;
+
+	return kobj;
+}
+
+/* toi_register_sysfs_file
+ *
+ * Helper for registering a new /sysfs/tuxonice entry.
+ */
+
+int toi_register_sysfs_file(
+		struct kobject *kobj,
+		struct toi_sysfs_data *toi_sysfs_data)
+{
+	int result;
+
+	if (!toi_sysfs_initialised)
+		toi_initialise_sysfs();
+
+	result = sysfs_create_file(kobj, &toi_sysfs_data->attr);
+	if (result)
+		printk(KERN_INFO "TuxOnIce: sysfs_create_file for %s "
+			"returned %d.\n",
+			toi_sysfs_data->attr.name, result);
+	kobj->ktype = &toi_ktype;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(toi_register_sysfs_file);
+
+/* toi_unregister_sysfs_file
+ *
+ * Helper for removing unwanted /sys/power/tuxonice entries.
+ *
+ */
+void toi_unregister_sysfs_file(struct kobject *kobj,
+		struct toi_sysfs_data *toi_sysfs_data)
+{
+	sysfs_remove_file(kobj, &toi_sysfs_data->attr);
+}
+EXPORT_SYMBOL_GPL(toi_unregister_sysfs_file);
+
+void toi_cleanup_sysfs(void)
+{
+	int i,
+	    numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+	if (!toi_sysfs_initialised)
+		return;
+
+	for (i = 0; i < numfiles; i++)
+		toi_unregister_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+
+	kobject_put(tuxonice_kobj);
+	toi_sysfs_initialised = 0;
+}
+
+/* toi_initialise_sysfs
+ *
+ * Initialise the /sysfs/tuxonice directory.
+ */
+
+static void toi_initialise_sysfs(void)
+{
+	int i;
+	int numfiles = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data);
+
+	if (toi_sysfs_initialised)
+		return;
+
+	/* Make our TuxOnIce directory a child of /sys/power */
+	tuxonice_kobj = kobject_create_and_add("tuxonice", power_kobj);
+	if (!tuxonice_kobj)
+		return;
+
+	toi_sysfs_initialised = 1;
+
+	for (i = 0; i < numfiles; i++)
+		toi_register_sysfs_file(tuxonice_kobj, &sysfs_params[i]);
+}
+
+int toi_sysfs_init(void)
+{
+	toi_initialise_sysfs();
+	return 0;
+}
+
+void toi_sysfs_exit(void)
+{
+	toi_cleanup_sysfs();
+}
diff -Npur linux-2.6-block/kernel/power/tuxonice_sysfs.h linux-2.6-block-custom/kernel/power/tuxonice_sysfs.h
--- linux-2.6-block/kernel/power/tuxonice_sysfs.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_sysfs.h	2008-09-26 19:48:24.027754296 +0900
@@ -0,0 +1,127 @@
+/*
+ * kernel/power/tuxonice_sysfs.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sysfs.h>
+#include "power.h"
+
+struct toi_sysfs_data {
+	struct attribute attr;
+	int type;
+	int flags;
+	union {
+		struct {
+			unsigned long *bit_vector;
+			int bit;
+		} bit;
+		struct {
+			int *variable;
+			int minimum;
+			int maximum;
+		} integer;
+		struct {
+			long *variable;
+			long minimum;
+			long maximum;
+		} a_long;
+		struct {
+			unsigned long *variable;
+			unsigned long minimum;
+			unsigned long maximum;
+		} ul;
+		struct {
+			char *variable;
+			int max_length;
+		} string;
+		struct {
+			int (*read_sysfs) (const char *buffer, int count);
+			int (*write_sysfs) (const char *buffer, int count);
+			void *data;
+		} special;
+	} data;
+
+	/* Side effects routines. Used, eg, for reparsing the
+	 * resume= entry when it changes */
+	void (*read_side_effect) (void);
+	void (*write_side_effect) (void);
+	struct list_head sysfs_data_list;
+};
+
+enum {
+	TOI_SYSFS_DATA_NONE = 1,
+	TOI_SYSFS_DATA_CUSTOM,
+	TOI_SYSFS_DATA_BIT,
+	TOI_SYSFS_DATA_INTEGER,
+	TOI_SYSFS_DATA_UL,
+	TOI_SYSFS_DATA_LONG,
+	TOI_SYSFS_DATA_STRING
+};
+
+#define TOI_ATTR(_name, _mode)      \
+	.attr = {.name  = _name , .mode   = _mode }
+
+#define SYSFS_BIT(_ul, _bit, _flags) \
+	.type = TOI_SYSFS_DATA_BIT, \
+	.flags = _flags, \
+	.data = { .bit = { .bit_vector = _ul, .bit = _bit } }
+
+#define SYSFS_INT(_int, _min, _max, _flags) \
+	.type = TOI_SYSFS_DATA_INTEGER, \
+	.flags = _flags, \
+	.data = { .integer = { .variable = _int, .minimum = _min, \
+			.maximum = _max } }
+
+#define SYSFS_UL(_ul, _min, _max, _flags) \
+	.type = TOI_SYSFS_DATA_UL, \
+	.flags = _flags, \
+	.data = { .ul = { .variable = _ul, .minimum = _min, \
+			.maximum = _max } }
+
+#define SYSFS_LONG(_long, _min, _max, _flags) \
+	.type = TOI_SYSFS_DATA_LONG, \
+	.flags = _flags, \
+	.data = { .a_long = { .variable = _long, .minimum = _min, \
+			.maximum = _max } }
+
+#define SYSFS_STRING(_string, _max_len, _flags) \
+	.type = TOI_SYSFS_DATA_STRING, \
+	.flags = _flags, \
+	.data = { .string = { .variable = _string, .max_length = _max_len } }
+
+#define SYSFS_CUSTOM(_read, _write, _flags) \
+	.type = TOI_SYSFS_DATA_CUSTOM, \
+	.flags = _flags, \
+	.data = { .special = { .read_sysfs = _read, .write_sysfs = _write } }
+
+#define SYSFS_WRITEONLY 0200
+#define SYSFS_READONLY 0444
+#define SYSFS_RW 0644
+
+/* Flags */
+#define SYSFS_NEEDS_SM_FOR_READ 1
+#define SYSFS_NEEDS_SM_FOR_WRITE 2
+#define SYSFS_HIBERNATE 4
+#define SYSFS_RESUME 8
+#define SYSFS_HIBERNATE_OR_RESUME (SYSFS_HIBERNATE | SYSFS_RESUME)
+#define SYSFS_HIBERNATING (SYSFS_HIBERNATE | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_RESUMING (SYSFS_RESUME | SYSFS_NEEDS_SM_FOR_WRITE)
+#define SYSFS_NEEDS_SM_FOR_BOTH \
+ (SYSFS_NEEDS_SM_FOR_READ | SYSFS_NEEDS_SM_FOR_WRITE)
+
+int toi_register_sysfs_file(struct kobject *kobj,
+		struct toi_sysfs_data *toi_sysfs_data);
+void toi_unregister_sysfs_file(struct kobject *kobj,
+		struct toi_sysfs_data *toi_sysfs_data);
+
+extern struct kobject *tuxonice_kobj;
+
+struct kobject *make_toi_sysdir(char *name);
+void remove_toi_sysdir(struct kobject *obj);
+extern void toi_cleanup_sysfs(void);
+
+extern int toi_sysfs_init(void);
+extern void toi_sysfs_exit(void);
diff -Npur linux-2.6-block/kernel/power/tuxonice_ui.c linux-2.6-block-custom/kernel/power/tuxonice_ui.c
--- linux-2.6-block/kernel/power/tuxonice_ui.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_ui.c	2008-09-26 19:48:24.031795592 +0900
@@ -0,0 +1,261 @@
+/*
+ * kernel/power/tuxonice_ui.c
+ *
+ * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
+ * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
+ * Copyright (C) 2002-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/reboot.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+#include "tuxonice_builtin.h"
+
+static char local_printf_buf[1024];	/* Same as printk - should be safe */
+struct ui_ops *toi_current_ui;
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui or /dev/console.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress, either from userui or /dev/console if userui isn't
+ * available. The non-userui path is particularly for at boot-time, prior
+ * to userui being started, when we have an important warning to give to
+ * the user.
+ */
+static char toi_wait_for_keypress(int timeout)
+{
+	if (toi_current_ui && toi_current_ui->wait_for_key(timeout))
+		return ' ';
+
+	return toi_wait_for_keypress_dev_console(timeout);
+}
+
+/* toi_early_boot_message()
+ * Description:	Handle errors early in the process of booting.
+ * 		The user may press C to continue booting, perhaps
+ * 		invalidating the image,  or space to reboot.
+ * 		This works from either the serial console or normally
+ * 		attached keyboard.
+ *
+ * 		Note that we come in here from init, while the kernel is
+ * 		locked. If we want to get events from the serial console,
+ * 		we need to temporarily unlock the kernel.
+ *
+ * 		toi_early_boot_message may also be called post-boot.
+ * 		In this case, it simply printks the message and returns.
+ *
+ * Arguments:	int	Whether we are able to erase the image.
+ * 		int	default_answer. What to do when we timeout. This
+ * 			will normally be continue, but the user might
+ * 			provide command line options (__setup) to override
+ * 			particular cases.
+ * 		Char *. Pointer to a string explaining why we're moaning.
+ */
+
+#define say(message, a...) printk(KERN_EMERG message, ##a)
+
+void toi_early_boot_message(int message_detail, int default_answer,
+	char *warning_reason, ...)
+{
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+	unsigned long orig_state = get_toi_state(), continue_req = 0;
+	unsigned long orig_loglevel = console_loglevel;
+	int can_ask = 1;
+#else
+	int can_ask = 0;
+#endif
+
+	va_list args;
+	int printed_len;
+
+	if (!toi_wait) {
+		set_toi_state(TOI_CONTINUE_REQ);
+		can_ask = 0;
+	}
+
+	if (warning_reason) {
+		va_start(args, warning_reason);
+		printed_len = vsnprintf(local_printf_buf,
+				sizeof(local_printf_buf),
+				warning_reason,
+				args);
+		va_end(args);
+	}
+
+	if (!test_toi_state(TOI_BOOT_TIME)) {
+		printk("TuxOnIce: %s\n", local_printf_buf);
+		return;
+	}
+
+	if (!can_ask) {
+		continue_req = !!default_answer;
+		goto post_ask;
+	}
+
+#if defined(CONFIG_VT) || defined(CONFIG_SERIAL_CONSOLE)
+	console_loglevel = 7;
+
+	say("=== TuxOnIce ===\n\n");
+	if (warning_reason) {
+		say("BIG FAT WARNING!! %s\n\n", local_printf_buf);
+		switch (message_detail) {
+		case 0:
+			say("If you continue booting, note that any image WILL"
+				"NOT BE REMOVED.\nTuxOnIce is unable to do so "
+				"because the appropriate modules aren't\n"
+				"loaded. You should manually remove the image "
+				"to avoid any\npossibility of corrupting your "
+				"filesystem(s) later.\n");
+			break;
+		case 1:
+			say("If you want to use the current TuxOnIce image, "
+				"reboot and try\nagain with the same kernel "
+				"that you hibernated from. If you want\n"
+				"to forget that image, continue and the image "
+				"will be erased.\n");
+			break;
+		}
+		say("Press SPACE to reboot or C to continue booting with "
+			"this kernel\n\n");
+		if (toi_wait > 0)
+			say("Default action if you don't select one in %d "
+				"seconds is: %s.\n",
+				toi_wait,
+				default_answer == TOI_CONTINUE_REQ ?
+				"continue booting" : "reboot");
+	} else {
+		say("BIG FAT WARNING!!\n\n"
+			"You have tried to resume from this image before.\n"
+			"If it failed once, it may well fail again.\n"
+			"Would you like to remove the image and boot "
+			"normally?\nThis will be equivalent to entering "
+			"noresume on the\nkernel command line.\n\n"
+			"Press SPACE to remove the image or C to continue "
+			"resuming.\n\n");
+		if (toi_wait > 0)
+			say("Default action if you don't select one in %d "
+				"seconds is: %s.\n", toi_wait,
+				!!default_answer ?
+				"continue resuming" : "remove the image");
+	}
+	console_loglevel = orig_loglevel;
+
+	set_toi_state(TOI_SANITY_CHECK_PROMPT);
+	clear_toi_state(TOI_CONTINUE_REQ);
+
+	if (toi_wait_for_keypress(toi_wait) == 0) /* We timed out */
+		continue_req = !!default_answer;
+	else
+		continue_req = test_toi_state(TOI_CONTINUE_REQ);
+
+#endif /* CONFIG_VT or CONFIG_SERIAL_CONSOLE */
+
+post_ask:
+	if ((warning_reason) && (!continue_req))
+		machine_restart(NULL);
+
+	restore_toi_state(orig_state);
+	if (continue_req)
+		set_toi_state(TOI_CONTINUE_REQ);
+}
+#undef say
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+	{ TOI_ATTR("default_console_level", SYSFS_RW),
+	  SYSFS_INT(&toi_bkd.toi_default_console_level, 0, 7, 0)
+	},
+
+	{ TOI_ATTR("debug_sections", SYSFS_RW),
+	  SYSFS_UL(&toi_bkd.toi_debug_state, 0, 1 << 30, 0)
+	},
+
+	{ TOI_ATTR("log_everything", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_LOGALL, 0)
+	},
+#endif
+	{ TOI_ATTR("pm_prepare_console", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_PM_PREPARE_CONSOLE, 0)
+	}
+};
+
+static struct toi_module_ops userui_ops = {
+	.type				= MISC_HIDDEN_MODULE,
+	.name				= "printk ui",
+	.directory			= "user_interface",
+	.module				= THIS_MODULE,
+	.sysfs_data			= sysfs_params,
+	.num_sysfs_entries		= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+int toi_register_ui_ops(struct ui_ops *this_ui)
+{
+	if (toi_current_ui) {
+		printk(KERN_INFO "Only one TuxOnIce user interface module can "
+				"be loaded at a time.");
+		return -EBUSY;
+	}
+
+	toi_current_ui = this_ui;
+
+	return 0;
+}
+
+void toi_remove_ui_ops(struct ui_ops *this_ui)
+{
+	if (toi_current_ui != this_ui)
+		return;
+
+	toi_current_ui = NULL;
+}
+
+/* toi_console_sysfs_init
+ * Description: Boot time initialisation for user interface.
+ */
+
+int toi_ui_init(void)
+{
+	return toi_register_module(&userui_ops);
+}
+
+void toi_ui_exit(void)
+{
+	toi_unregister_module(&userui_ops);
+}
+
+#ifdef CONFIG_TOI_EXPORTS
+EXPORT_SYMBOL_GPL(toi_current_ui);
+EXPORT_SYMBOL_GPL(toi_early_boot_message);
+EXPORT_SYMBOL_GPL(toi_register_ui_ops);
+EXPORT_SYMBOL_GPL(toi_remove_ui_ops);
+#endif
diff -Npur linux-2.6-block/kernel/power/tuxonice_ui.h linux-2.6-block-custom/kernel/power/tuxonice_ui.h
--- linux-2.6-block/kernel/power/tuxonice_ui.h	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_ui.h	2008-09-26 19:48:24.031795592 +0900
@@ -0,0 +1,104 @@
+/*
+ * kernel/power/tuxonice_ui.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham (nigel at tuxonice net)
+ */
+
+enum {
+	DONT_CLEAR_BAR,
+	CLEAR_BAR
+};
+
+enum {
+	/* Userspace -> Kernel */
+	USERUI_MSG_ABORT = 0x11,
+	USERUI_MSG_SET_STATE = 0x12,
+	USERUI_MSG_GET_STATE = 0x13,
+	USERUI_MSG_GET_DEBUG_STATE = 0x14,
+	USERUI_MSG_SET_DEBUG_STATE = 0x15,
+	USERUI_MSG_SPACE = 0x18,
+	USERUI_MSG_GET_POWERDOWN_METHOD = 0x1A,
+	USERUI_MSG_SET_POWERDOWN_METHOD = 0x1B,
+	USERUI_MSG_GET_LOGLEVEL = 0x1C,
+	USERUI_MSG_SET_LOGLEVEL = 0x1D,
+	USERUI_MSG_PRINTK = 0x1E,
+
+	/* Kernel -> Userspace */
+	USERUI_MSG_MESSAGE = 0x21,
+	USERUI_MSG_PROGRESS = 0x22,
+	USERUI_MSG_POST_ATOMIC_RESTORE = 0x25,
+
+	USERUI_MSG_MAX,
+};
+
+struct userui_msg_params {
+	unsigned long a, b, c, d;
+	char text[255];
+};
+
+struct ui_ops {
+	char (*wait_for_key) (int timeout);
+	unsigned long (*update_status) (unsigned long value,
+		unsigned long maximum, const char *fmt, ...);
+	void (*prepare_status) (int clearbar, const char *fmt, ...);
+	void (*cond_pause) (int pause, char *message);
+	void (*abort)(int result_code, const char *fmt, ...);
+	void (*prepare)(void);
+	void (*cleanup)(void);
+	void (*post_atomic_restore)(void);
+	void (*message)(unsigned long section, unsigned long level,
+		int normally_logged, const char *fmt, ...);
+};
+
+extern struct ui_ops *toi_current_ui;
+
+#define toi_update_status(val, max, fmt, args...) \
+ (toi_current_ui ? (toi_current_ui->update_status) (val, max, fmt, ##args) : \
+	max)
+
+#define toi_ui_post_atomic_restore(void) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->post_atomic_restore)(); \
+	} while (0)
+
+#define toi_prepare_console(void) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->prepare)(); \
+	} while (0)
+
+#define toi_cleanup_console(void) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->cleanup)(); \
+	} while (0)
+
+#define abort_hibernate(result, fmt, args...) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->abort)(result, fmt, ##args); \
+	     else { \
+		set_abort_result(result); \
+	     } \
+	} while (0)
+
+#define toi_cond_pause(pause, message) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->cond_pause)(pause, message); \
+	} while (0)
+
+#define toi_prepare_status(clear, fmt, args...) \
+	do { if (toi_current_ui) \
+		(toi_current_ui->prepare_status)(clear, fmt, ##args); \
+	     else \
+		printk(KERN_ERR fmt "%s", ##args, "\n"); \
+	} while (0)
+
+#define toi_message(sn, lev, log, fmt, a...) \
+do { \
+	if (toi_current_ui && (!sn || test_debug_state(sn))) \
+		toi_current_ui->message(sn, lev, log, fmt, ##a); \
+} while (0)
+
+__exit void toi_ui_cleanup(void);
+extern int toi_ui_init(void);
+extern void toi_ui_exit(void);
+extern int toi_register_ui_ops(struct ui_ops *this_ui);
+extern void toi_remove_ui_ops(struct ui_ops *this_ui);
diff -Npur linux-2.6-block/kernel/power/tuxonice_userui.c linux-2.6-block-custom/kernel/power/tuxonice_userui.c
--- linux-2.6-block/kernel/power/tuxonice_userui.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/kernel/power/tuxonice_userui.c	2008-09-26 19:48:24.031795592 +0900
@@ -0,0 +1,675 @@
+/*
+ * kernel/power/user_ui.c
+ *
+ * Copyright (C) 2005-2007 Bernard Blackham
+ * Copyright (C) 2002-2007 Nigel Cunningham (nigel at tuxonice net)
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for TuxOnIce's user interface.
+ *
+ * The user interface code talks to a userspace program via a
+ * netlink socket.
+ *
+ * The kernel side:
+ * - starts the userui program;
+ * - sends text messages and progress bar status;
+ *
+ * The user space side:
+ * - passes messages regarding user requests (abort, toggle reboot etc)
+ *
+ */
+
+#define __KERNEL_SYSCALLS__
+
+#include <linux/suspend.h>
+#include <linux/freezer.h>
+#include <linux/console.h>
+#include <linux/ctype.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/kmod.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+
+#include "tuxonice_sysfs.h"
+#include "tuxonice_modules.h"
+#include "tuxonice.h"
+#include "tuxonice_ui.h"
+#include "tuxonice_netlink.h"
+#include "tuxonice_power_off.h"
+
+static char local_printf_buf[1024];	/* Same as printk - should be safe */
+
+static struct user_helper_data ui_helper_data;
+static struct toi_module_ops userui_ops;
+static int orig_kmsg;
+
+static char lastheader[512];
+static int lastheader_message_len;
+static int ui_helper_changed; /* Used at resume-time so don't overwrite value
+				set from initrd/ramfs. */
+
+/* Number of distinct progress amounts that userspace can display */
+static int progress_granularity = 30;
+
+static DECLARE_WAIT_QUEUE_HEAD(userui_wait_for_key);
+
+/**
+ * ui_nl_set_state - Update toi_action based on a message from userui.
+ *
+ * @n: The bit (1 << bit) to set.
+ */
+static void ui_nl_set_state(int n)
+{
+	/* Only let them change certain settings */
+	static const int toi_action_mask =
+		(1 << TOI_REBOOT) | (1 << TOI_PAUSE) |
+		(1 << TOI_LOGALL) |
+		(1 << TOI_SINGLESTEP) |
+		(1 << TOI_PAUSE_NEAR_PAGESET_END);
+
+	toi_bkd.toi_action = (toi_bkd.toi_action & (~toi_action_mask)) |
+		(n & toi_action_mask);
+
+	if (!test_action_state(TOI_PAUSE) &&
+			!test_action_state(TOI_SINGLESTEP))
+		wake_up_interruptible(&userui_wait_for_key);
+}
+
+/**
+ * userui_post_atomic_restore - Tell userui that atomic restore just happened.
+ *
+ * Tell userui that atomic restore just occured, so that it can do things like
+ * redrawing the screen, re-getting settings and so on.
+ */
+static void userui_post_atomic_restore(void)
+{
+	toi_send_netlink_message(&ui_helper_data,
+			USERUI_MSG_POST_ATOMIC_RESTORE, NULL, 0);
+}
+
+/**
+ * userui_storage_needed - Report how much memory in image header is needed.
+ */
+static int userui_storage_needed(void)
+{
+	return sizeof(ui_helper_data.program) + 1 + sizeof(int);
+}
+
+/**
+ * userui_save_config_info - Fill buffer with config info for image header.
+ *
+ * @buf: Buffer into which to put the config info we want to save.
+ */
+static int userui_save_config_info(char *buf)
+{
+	*((int *) buf) = progress_granularity;
+	memcpy(buf + sizeof(int), ui_helper_data.program,
+			sizeof(ui_helper_data.program));
+	return sizeof(ui_helper_data.program) + sizeof(int) + 1;
+}
+
+/**
+ * userui_load_config_info - Restore config info from buffer.
+ *
+ * @buf: Buffer containing header info loaded.
+ * @size: Size of data loaded for this module.
+ */
+static void userui_load_config_info(char *buf, int size)
+{
+	progress_granularity = *((int *) buf);
+	size -= sizeof(int);
+
+	/* Don't load the saved path if one has already been set */
+	if (ui_helper_changed)
+		return;
+
+	if (size > sizeof(ui_helper_data.program))
+		size = sizeof(ui_helper_data.program);
+
+	memcpy(ui_helper_data.program, buf + sizeof(int), size);
+	ui_helper_data.program[sizeof(ui_helper_data.program)-1] = '\0';
+}
+
+/**
+ * set_ui_program_set: Record that userui program was changed.
+ *
+ * Side effect routine for when the userui program is set. In an initrd or
+ * ramfs, the user may set a location for the userui program. If this happens,
+ * we don't want to reload the value that was saved in the image header. This
+ * routine allows us to flag that we shouldn't restore the program name from
+ * the image header.
+ */
+static void set_ui_program_set(void)
+{
+	ui_helper_changed = 1;
+}
+
+/**
+ * userui_memory_needed - Tell core how much memory to reserve for us.
+ */
+static int userui_memory_needed(void)
+{
+	/* ball park figure of 128 pages */
+	return (128 * PAGE_SIZE);
+}
+
+/**
+ * userui_update_status - Update the progress bar and (if on) in-bar message.
+ *
+ * @value: Current progress percentage numerator.
+ * @maximum: Current progress percentage denominator.
+ * @fmt: Message to be displayed in the middle of the progress bar.
+ *
+ * Note that a NULL message does not mean that any previous message is erased!
+ * For that, you need toi_prepare_status with clearbar on.
+ *
+ * Returns an unsigned long, being the next numerator (as determined by the
+ * maximum and progress granularity) where status needs to be updated.
+ * This is to reduce unnecessary calls to update_status.
+ */
+static unsigned long userui_update_status(unsigned long value,
+		unsigned long maximum, const char *fmt, ...)
+{
+	static int last_step = -1;
+	struct userui_msg_params msg;
+	int bitshift;
+	int this_step;
+	unsigned long next_update;
+
+	if (ui_helper_data.pid == -1)
+		return 0;
+
+	if ((!maximum) || (!progress_granularity))
+		return maximum;
+
+	if (value < 0)
+		value = 0;
+
+	if (value > maximum)
+		value = maximum;
+
+	/* Try to avoid math problems - we can't do 64 bit math here
+	 * (and shouldn't need it - anyone got screen resolution
+	 * of 65536 pixels or more?) */
+	bitshift = fls(maximum) - 16;
+	if (bitshift > 0) {
+		unsigned long temp_maximum = maximum >> bitshift;
+		unsigned long temp_value = value >> bitshift;
+		this_step = (int)
+			(temp_value * progress_granularity / temp_maximum);
+		next_update = (((this_step + 1) * temp_maximum /
+					progress_granularity) + 1) << bitshift;
+	} else {
+		this_step = (int) (value * progress_granularity / maximum);
+		next_update = ((this_step + 1) * maximum /
+				progress_granularity) + 1;
+	}
+
+	if (this_step == last_step)
+		return next_update;
+
+	memset(&msg, 0, sizeof(msg));
+
+	msg.a = this_step;
+	msg.b = progress_granularity;
+
+	if (fmt) {
+		va_list args;
+		va_start(args, fmt);
+		vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+		va_end(args);
+		msg.text[sizeof(msg.text)-1] = '\0';
+	}
+
+	toi_send_netlink_message(&ui_helper_data, USERUI_MSG_PROGRESS,
+			&msg, sizeof(msg));
+	last_step = this_step;
+
+	return next_update;
+}
+
+/**
+ * userui_message - Display a message without necessarily logging it.
+ *
+ * @section: Type of message. Messages can be filtered by type.
+ * @level: Degree of importance of the message. Lower values = higher priority.
+ * @normally_logged: Whether logged even if log_everything is off.
+ * @fmt: Message (and parameters).
+ *
+ * This function is intended to do the same job as printk, but without normally
+ * logging what is printed. The point is to be able to get debugging info on
+ * screen without filling the logs with "1/534. ^M 2/534^M. 3/534^M"
+ *
+ * It may be called from an interrupt context - can't sleep!
+ */
+static void userui_message(unsigned long section, unsigned long level,
+		int normally_logged, const char *fmt, ...)
+{
+	struct userui_msg_params msg;
+
+	if ((level) && (level > console_loglevel))
+		return;
+
+	memset(&msg, 0, sizeof(msg));
+
+	msg.a = section;
+	msg.b = level;
+	msg.c = normally_logged;
+
+	if (fmt) {
+		va_list args;
+		va_start(args, fmt);
+		vsnprintf(msg.text, sizeof(msg.text), fmt, args);
+		va_end(args);
+		msg.text[sizeof(msg.text)-1] = '\0';
+	}
+
+	if (test_action_state(TOI_LOGALL))
+		printk(KERN_INFO "%s\n", msg.text);
+
+	toi_send_netlink_message(&ui_helper_data, USERUI_MSG_MESSAGE,
+			&msg, sizeof(msg));
+}
+
+/**
+ * wait_for_key_via_userui - Wait for userui to receive a keypress.
+ */
+static void wait_for_key_via_userui(void)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(&userui_wait_for_key, &wait);
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	interruptible_sleep_on(&userui_wait_for_key);
+
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(&userui_wait_for_key, &wait);
+}
+
+/**
+ * userui_prepare_status - Display high level messages.
+ *
+ * @clearbar: Whether to clear the progress bar.
+ * @fmt...: New message for the title.
+ *
+ * Prepare the 'nice display', drawing the header and version, along with the
+ * current action and perhaps also resetting the progress bar.
+ */
+static void userui_prepare_status(int clearbar, const char *fmt, ...)
+{
+	va_list args;
+
+	if (fmt) {
+		va_start(args, fmt);
+		lastheader_message_len = vsnprintf(lastheader, 512, fmt, args);
+		va_end(args);
+	}
+
+	if (clearbar)
+		toi_update_status(0, 1, NULL);
+
+	if (ui_helper_data.pid == -1)
+		printk(KERN_EMERG "%s\n", lastheader);
+	else
+		toi_message(0, TOI_STATUS, 1, lastheader, NULL);
+}
+
+/**
+ * toi_wait_for_keypress - Wait for keypress via userui.
+ *
+ * @timeout: Maximum time to wait.
+ *
+ * Wait for a keypress from userui.
+ *
+ * FIXME: Implement timeout?
+ */
+static char userui_wait_for_keypress(int timeout)
+{
+	char key = '\0';
+
+	if (ui_helper_data.pid != -1) {
+		wait_for_key_via_userui();
+		key = ' ';
+	}
+
+	return key;
+}
+
+/**
+ * userui_abort_hibernate - Abort a cycle & tell user if they didn't request it.
+ *
+ * @result_code: Reason why we're aborting (1 << bit).
+ * @fmt: Message to display if telling the user what's going on.
+ *
+ * Abort a cycle. If this wasn't at the user's request (and we're displaying
+ * output), tell the user why and wait for them to acknowledge the message.
+ */
+static void userui_abort_hibernate(int result_code, const char *fmt, ...)
+{
+	va_list args;
+	int printed_len = 0;
+
+	set_result_state(result_code);
+
+	if (test_result_state(TOI_ABORTED))
+		return;
+
+	set_result_state(TOI_ABORTED);
+
+	if (test_result_state(TOI_ABORT_REQUESTED))
+		return;
+
+	va_start(args, fmt);
+	printed_len = vsnprintf(local_printf_buf,  sizeof(local_printf_buf),
+			fmt, args);
+	va_end(args);
+	if (ui_helper_data.pid != -1)
+		printed_len = sprintf(local_printf_buf + printed_len,
+					" (Press SPACE to continue)");
+
+	toi_prepare_status(CLEAR_BAR, "%s", local_printf_buf);
+
+	if (ui_helper_data.pid != -1)
+		userui_wait_for_keypress(0);
+}
+
+/**
+ * request_abort_hibernate - Abort hibernating or resuming at user request.
+ *
+ * Handle the user requesting the cancellation of a hibernation or resume by
+ * pressing escape.
+ */
+static void request_abort_hibernate(void)
+{
+	if (test_result_state(TOI_ABORT_REQUESTED))
+		return;
+
+	if (test_toi_state(TOI_NOW_RESUMING)) {
+		toi_prepare_status(CLEAR_BAR, "Escape pressed. "
+					"Powering down again.");
+		set_toi_state(TOI_STOP_RESUME);
+		while (!test_toi_state(TOI_IO_STOPPED))
+			schedule();
+		if (toiActiveAllocator->mark_resume_attempted)
+			toiActiveAllocator->mark_resume_attempted(0);
+		toi_power_down();
+	}
+
+	toi_prepare_status(CLEAR_BAR, "--- ESCAPE PRESSED :"
+					" ABORTING HIBERNATION ---");
+	set_abort_result(TOI_ABORT_REQUESTED);
+	wake_up_interruptible(&userui_wait_for_key);
+}
+
+/**
+ * userui_user_rcv_msg - Receive a netlink message from userui.
+ *
+ * @skb: skb received.
+ * @nlh: Netlink header received.
+ */
+static int userui_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	int type;
+	int *data;
+
+	type = nlh->nlmsg_type;
+
+	/* A control message: ignore them */
+	if (type < NETLINK_MSG_BASE)
+		return 0;
+
+	/* Unknown message: reply with EINVAL */
+	if (type >= USERUI_MSG_MAX)
+		return -EINVAL;
+
+	/* All operations require privileges, even GET */
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
+	/* Only allow one task to receive NOFREEZE privileges */
+	if (type == NETLINK_MSG_NOFREEZE_ME && ui_helper_data.pid != -1) {
+		printk(KERN_INFO "Got NOFREEZE_ME request when "
+			"ui_helper_data.pid is %d.\n", ui_helper_data.pid);
+		return -EBUSY;
+	}
+
+	data = (int *) NLMSG_DATA(nlh);
+
+	switch (type) {
+	case USERUI_MSG_ABORT:
+		request_abort_hibernate();
+		return 0;
+	case USERUI_MSG_GET_STATE:
+		toi_send_netlink_message(&ui_helper_data,
+				USERUI_MSG_GET_STATE, &toi_bkd.toi_action,
+				sizeof(toi_bkd.toi_action));
+		return 0;
+	case USERUI_MSG_GET_DEBUG_STATE:
+		toi_send_netlink_message(&ui_helper_data,
+				USERUI_MSG_GET_DEBUG_STATE,
+				&toi_bkd.toi_debug_state,
+				sizeof(toi_bkd.toi_debug_state));
+		return 0;
+	case USERUI_MSG_SET_STATE:
+		if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+			return -EINVAL;
+		ui_nl_set_state(*data);
+		return 0;
+	case USERUI_MSG_SET_DEBUG_STATE:
+		if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+			return -EINVAL;
+		toi_bkd.toi_debug_state = (*data);
+		return 0;
+	case USERUI_MSG_SPACE:
+		wake_up_interruptible(&userui_wait_for_key);
+		return 0;
+	case USERUI_MSG_GET_POWERDOWN_METHOD:
+		toi_send_netlink_message(&ui_helper_data,
+				USERUI_MSG_GET_POWERDOWN_METHOD,
+				&toi_poweroff_method,
+				sizeof(toi_poweroff_method));
+		return 0;
+	case USERUI_MSG_SET_POWERDOWN_METHOD:
+		if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+			return -EINVAL;
+		toi_poweroff_method = (*data);
+		return 0;
+	case USERUI_MSG_GET_LOGLEVEL:
+		toi_send_netlink_message(&ui_helper_data,
+				USERUI_MSG_GET_LOGLEVEL,
+				&toi_bkd.toi_default_console_level,
+				sizeof(toi_bkd.toi_default_console_level));
+		return 0;
+	case USERUI_MSG_SET_LOGLEVEL:
+		if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(int)))
+			return -EINVAL;
+		toi_bkd.toi_default_console_level = (*data);
+		return 0;
+	case USERUI_MSG_PRINTK:
+		printk((char *) data);
+		return 0;
+	}
+
+	/* Unhandled here */
+	return 1;
+}
+
+/**
+ * userui_cond_pause - Possibly pause at user request.
+ *
+ * @pause: Whether to pause or just display the message.
+ * @message: Message to display at the start of pausing.
+ *
+ * Potentially pause and wait for the user to tell us to continue. We normally
+ * only pause when @pause is set. While paused, the user can do things like
+ * changing the loglevel, toggling the display of debugging sections and such
+ * like.
+ */
+static void userui_cond_pause(int pause, char *message)
+{
+	int displayed_message = 0, last_key = 0;
+
+	while (last_key != 32 &&
+		ui_helper_data.pid != -1 &&
+		((test_action_state(TOI_PAUSE) && pause) ||
+		 (test_action_state(TOI_SINGLESTEP)))) {
+		if (!displayed_message) {
+			toi_prepare_status(DONT_CLEAR_BAR,
+			   "%s Press SPACE to continue.%s",
+			   message ? message : "",
+			   (test_action_state(TOI_SINGLESTEP)) ?
+			   " Single step on." : "");
+			displayed_message = 1;
+		}
+		last_key = userui_wait_for_keypress(0);
+	}
+	schedule();
+}
+
+/**
+ * userui_prepare_console - Prepare the console for use.
+ *
+ * Prepare a console for use, saving current kmsg settings and attempting to
+ * start userui. Console loglevel changes are handled by userui.
+ */
+static void userui_prepare_console(void)
+{
+	orig_kmsg = kmsg_redirect;
+	kmsg_redirect = fg_console + 1;
+
+	ui_helper_data.pid = -1;
+
+	if (!userui_ops.enabled) {
+		printk("TuxOnIce: Userui disabled.\n");
+		return;
+	}
+
+	if (*ui_helper_data.program)
+		toi_netlink_setup(&ui_helper_data);
+	else
+		printk(KERN_INFO "TuxOnIce: Userui program not configured.\n");
+}
+
+/**
+ * userui_cleanup_console - Cleanup after a cycle.
+ *
+ * Tell userui to cleanup, and restore kmsg_redirect to its original value.
+ */
+
+static void userui_cleanup_console(void)
+{
+	if (ui_helper_data.pid > -1)
+		toi_netlink_close(&ui_helper_data);
+
+	kmsg_redirect = orig_kmsg;
+}
+
+/*
+ * User interface specific /sys/power/tuxonice entries.
+ */
+
+static struct toi_sysfs_data sysfs_params[] = {
+#if defined(CONFIG_NET) && defined(CONFIG_SYSFS)
+	{ TOI_ATTR("enable_escape", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_CAN_CANCEL, 0)
+	},
+
+	{ TOI_ATTR("pause_between_steps", SYSFS_RW),
+	  SYSFS_BIT(&toi_bkd.toi_action, TOI_PAUSE, 0)
+	},
+
+	{ TOI_ATTR("enabled", SYSFS_RW),
+	  SYSFS_INT(&userui_ops.enabled, 0, 1, 0)
+	},
+
+	{ TOI_ATTR("progress_granularity", SYSFS_RW),
+	  SYSFS_INT(&progress_granularity, 1, 2048, 0)
+	},
+
+	{ TOI_ATTR("program", SYSFS_RW),
+	  SYSFS_STRING(ui_helper_data.program, 255, 0),
+	  .write_side_effect = set_ui_program_set,
+	},
+#endif
+};
+
+static struct toi_module_ops userui_ops = {
+	.type				= MISC_MODULE,
+	.name				= "userui",
+	.shared_directory		= "user_interface",
+	.module				= THIS_MODULE,
+	.storage_needed			= userui_storage_needed,
+	.save_config_info		= userui_save_config_info,
+	.load_config_info		= userui_load_config_info,
+	.memory_needed			= userui_memory_needed,
+	.sysfs_data			= sysfs_params,
+	.num_sysfs_entries		= sizeof(sysfs_params) /
+		sizeof(struct toi_sysfs_data),
+};
+
+static struct ui_ops my_ui_ops = {
+	.post_atomic_restore		= userui_post_atomic_restore,
+	.update_status			= userui_update_status,
+	.message			= userui_message,
+	.prepare_status			= userui_prepare_status,
+	.abort				= userui_abort_hibernate,
+	.cond_pause			= userui_cond_pause,
+	.prepare			= userui_prepare_console,
+	.cleanup			= userui_cleanup_console,
+	.wait_for_key			= userui_wait_for_keypress,
+};
+
+/**
+ * toi_user_ui_init - Boot time initialisation for user interface.
+ *
+ * Invoked from the core init routine.
+ */
+static __init int toi_user_ui_init(void)
+{
+	int result;
+
+	ui_helper_data.nl = NULL;
+	strncpy(ui_helper_data.program, CONFIG_TOI_USERUI_DEFAULT_PATH, 255);
+	ui_helper_data.pid = -1;
+	ui_helper_data.skb_size = sizeof(struct userui_msg_params);
+	ui_helper_data.pool_limit = 6;
+	ui_helper_data.netlink_id = NETLINK_TOI_USERUI;
+	ui_helper_data.name = "userspace ui";
+	ui_helper_data.rcv_msg = userui_user_rcv_msg;
+	ui_helper_data.interface_version = 7;
+	ui_helper_data.must_init = 0;
+	ui_helper_data.not_ready = userui_cleanup_console;
+	init_completion(&ui_helper_data.wait_for_process);
+	result = toi_register_module(&userui_ops);
+	if (!result)
+		result = toi_register_ui_ops(&my_ui_ops);
+	if (result)
+		toi_unregister_module(&userui_ops);
+
+	return result;
+}
+
+#ifdef MODULE
+/**
+ * toi_user_ui_ext - Cleanup code for if the core is unloaded.
+ */
+static __exit void toi_user_ui_exit(void)
+{
+	toi_netlink_close_complete(&ui_helper_data);
+	toi_remove_ui_ops(&my_ui_ops);
+	toi_unregister_module(&userui_ops);
+}
+
+module_init(toi_user_ui_init);
+module_exit(toi_user_ui_exit);
+MODULE_AUTHOR("Nigel Cunningham");
+MODULE_DESCRIPTION("TuxOnIce Userui Support");
+MODULE_LICENSE("GPL");
+#else
+late_initcall(toi_user_ui_init);
+#endif
diff -Npur linux-2.6-block/kernel/printk.c linux-2.6-block-custom/kernel/printk.c
--- linux-2.6-block/kernel/printk.c	2008-09-27 16:12:28.998907892 +0900
+++ linux-2.6-block-custom/kernel/printk.c	2008-09-26 19:45:20.398754871 +0900
@@ -32,6 +32,8 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
+#include <linux/jiffies.h>
+#include <linux/suspend.h>
 
 #include <asm/uaccess.h>
 
@@ -101,9 +103,12 @@ static DEFINE_SPINLOCK(logbuf_lock);
  * The indices into log_buf are not constrained to log_buf_len - they
  * must be masked before subscripting
  */
-static unsigned log_start;	/* Index into log_buf: next char to be read by syslog() */
-static unsigned con_start;	/* Index into log_buf: next char to be sent to consoles */
-static unsigned log_end;	/* Index into log_buf: most-recently-written-char + 1 */
+/* Index into log_buf: next char to be read by syslog() */
+static unsigned POSS_NOSAVE log_start;
+/* Index into log_buf: next char to be sent to consoles */
+static unsigned POSS_NOSAVE con_start;
+/* Index into log_buf: most-recently-written-char + 1 */
+static unsigned POSS_NOSAVE log_end;
 
 /*
  *	Array of consoles built from command line options (console=)
@@ -131,10 +136,11 @@ static int console_may_schedule;
 
 #ifdef CONFIG_PRINTK
 
-static char __log_buf[__LOG_BUF_LEN];
-static char *log_buf = __log_buf;
-static int log_buf_len = __LOG_BUF_LEN;
-static unsigned logged_chars; /* Number of chars produced since last read+clear operation */
+static POSS_NOSAVE char __log_buf[__LOG_BUF_LEN];
+static POSS_NOSAVE char *log_buf = __log_buf;
+static POSS_NOSAVE int log_buf_len = __LOG_BUF_LEN;
+/* Number of chars produced since last read+clear operation */
+static POSS_NOSAVE unsigned logged_chars;
 
 static int __init log_buf_len_setup(char *str)
 {
@@ -937,6 +943,7 @@ void suspend_console(void)
 	acquire_console_sem();
 	console_suspended = 1;
 }
+EXPORT_SYMBOL(suspend_console);
 
 void resume_console(void)
 {
@@ -945,6 +952,7 @@ void resume_console(void)
 	console_suspended = 0;
 	release_console_sem();
 }
+EXPORT_SYMBOL(resume_console);
 
 /**
  * acquire_console_sem - lock the console system for exclusive use.
diff -Npur linux-2.6-block/kernel/timer.c linux-2.6-block-custom/kernel/timer.c
--- linux-2.6-block/kernel/timer.c	2008-09-27 16:12:29.046900374 +0900
+++ linux-2.6-block-custom/kernel/timer.c	2008-09-26 19:45:20.398754871 +0900
@@ -37,6 +37,8 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -1002,6 +1004,59 @@ unsigned long avenrun[3];
 
 EXPORT_SYMBOL(avenrun);
 
+#ifdef CONFIG_PM
+static unsigned long avenrun_save[3];
+/*
+ * save_avenrun - Record the values prior to starting a hibernation cycle.
+ * We do this to make the work done in hibernation invisible to userspace
+ * post-suspend. Some programs, including some MTAs, watch the load average
+ * and stop work until it lowers. Without this, they would stop working for
+ * a while post-resume, unnecessarily.
+ */
+
+static void save_avenrun(void)
+{
+	avenrun_save[0] = avenrun[0];
+	avenrun_save[1] = avenrun[1];
+	avenrun_save[2] = avenrun[2];
+}
+
+static void restore_avenrun(void)
+{
+	if (!avenrun_save[0])
+		return;
+
+	avenrun[0] = avenrun_save[0];
+	avenrun[1] = avenrun_save[1];
+	avenrun[2] = avenrun_save[2];
+
+	avenrun_save[0] = 0;
+}
+
+static int avenrun_pm_callback(struct notifier_block *nfb,
+					unsigned long action,
+					void *ignored)
+{
+	switch (action) {
+	case PM_HIBERNATION_PREPARE:
+		save_avenrun();
+		return NOTIFY_OK;
+	case PM_POST_HIBERNATION:
+		restore_avenrun();
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void register_pm_notifier_callback(void)
+{
+	pm_notifier(avenrun_pm_callback, 0);
+}
+#else
+static inline void register_pm_notifier_callback(void) { }
+#endif
+
 /*
  * calc_load - given tick count, update the avenrun load estimates.
  * This is called while holding a write_lock on xtime_lock.
@@ -1495,6 +1550,7 @@ void __init init_timers(void)
 	BUG_ON(err == NOTIFY_BAD);
 	register_cpu_notifier(&timers_nb);
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
+	register_pm_notifier_callback();
 }
 
 /**
diff -Npur linux-2.6-block/lib/vsprintf.c linux-2.6-block-custom/lib/vsprintf.c
--- linux-2.6-block/lib/vsprintf.c	2008-09-27 16:12:29.166926034 +0900
+++ linux-2.6-block-custom/lib/vsprintf.c	2008-09-26 19:45:20.398754871 +0900
@@ -558,6 +558,29 @@ static char *pointer(const char *fmt, ch
 	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
 }
 
+/*
+ * vsnprintf_used
+ *
+ * Functionality    : Print a string with parameters to a buffer of a
+ *                    limited size. Unlike vsnprintf, we return the number
+ *                    of bytes actually put in the buffer, not the number
+ *                    that would have been put in if it was big enough.
+ */
+int snprintf_used(char *buffer, int buffer_size, const char *fmt, ...)
+{
+	int result;
+	va_list args;
+
+	if (!buffer_size)
+		return 0;
+
+	va_start(args, fmt);
+	result = vsnprintf(buffer, buffer_size, fmt, args);
+	va_end(args);
+
+	return result > buffer_size ? buffer_size : result;
+}
+
 /**
  * vsnprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
diff -Npur linux-2.6-block/mm/Makefile linux-2.6-block-custom/mm/Makefile
--- linux-2.6-block/mm/Makefile	2008-09-27 16:12:29.182924157 +0900
+++ linux-2.6-block-custom/mm/Makefile	2008-09-26 19:45:20.402755449 +0900
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   maccess.o page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o $(mmu-y)
+			   dyn_pageflags.o page_isolation.o mm_init.o $(mmu-y)
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
diff -Npur linux-2.6-block/mm/dyn_pageflags.c linux-2.6-block-custom/mm/dyn_pageflags.c
--- linux-2.6-block/mm/dyn_pageflags.c	1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6-block-custom/mm/dyn_pageflags.c	2008-09-26 20:10:05.854754305 +0900
@@ -0,0 +1,801 @@
+/*
+ * lib/dyn_pageflags.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham <nigel at tuxonice net>
+ *
+ * This file is released under the GPLv2.
+ *
+ * Routines for dynamically allocating and releasing bitmaps
+ * used as pseudo-pageflags.
+ *
+ * We use bitmaps, built out of order zero allocations and
+ * linked together by kzalloc'd arrays of pointers into
+ * an array that looks like...
+ *
+ * 	pageflags->bitmap[node][zone_id][page_num][ul]
+ *
+ * All of this is transparent to the caller, who just uses
+ * the allocate & free routines to create/destroy bitmaps,
+ * and get/set/clear to operate on individual flags.
+ *
+ * Bitmaps can be sparse, with the individual pages only being
+ * allocated when a bit is set in the page.
+ *
+ * Memory hotplugging support is work in progress. A zone's
+ * start_pfn may change. If it does, we need to reallocate
+ * the zone bitmap, adding additional pages to the front to
+ * cover the bitmap. For simplicity, we don't shift the
+ * contents of existing pages around. The lock is only used
+ * to avoid reentrancy when resizing zones. The replacement
+ * of old data with new is done atomically. If we try to test
+ * a bit in the new area before the update is completed, we
+ * know it's zero.
+ *
+ * TuxOnIce knows the structure of these pageflags, so that
+ * it can serialise them in the image header. TODO: Make
+ * that support more generic so that TuxOnIce doesn't need
+ * to know how dyn_pageflags are stored.
+ */
+
+/* Avoid warnings in include/linux/mm.h */
+struct page;
+struct dyn_pageflags;
+int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+
+#include <linux/bootmem.h>
+#include <linux/dyn_pageflags.h>
+#include <linux/module.h>
+
+static LIST_HEAD(flags_list);
+static DEFINE_SPINLOCK(flags_list_lock);
+
+static void* (*dyn_allocator)(unsigned long size, unsigned long flags);
+
+static int dyn_pageflags_debug;
+
+#define PR_DEBUG(a, b...) \
+	do { if (dyn_pageflags_debug) printk(a, ##b); } while (0)
+#define DUMP_DEBUG(bitmap) \
+	do { if (dyn_pageflags_debug) dump_pagemap(bitmap); } while (0)
+
+#if BITS_PER_LONG == 32
+#define UL_SHIFT 5
+#else
+#if BITS_PER_LONG == 64
+#define UL_SHIFT 6
+#else
+#error Bits per long not 32 or 64?
+#endif
+#endif
+
+#define BIT_NUM_MASK ((sizeof(unsigned long) << 3) - 1)
+#define PAGE_NUM_MASK (~((1 << (PAGE_SHIFT + 3)) - 1))
+#define UL_NUM_MASK (~(BIT_NUM_MASK | PAGE_NUM_MASK))
+
+/*
+ * PAGENUMBER gives the index of the page within the zone.
+ * PAGEINDEX gives the index of the unsigned long within that page.
+ * PAGEBIT gives the index of the bit within the unsigned long.
+ */
+#define PAGENUMBER(zone_offset) ((int) (zone_offset >> (PAGE_SHIFT + 3)))
+#define PAGEINDEX(zone_offset) ((int) ((zone_offset & UL_NUM_MASK) >> UL_SHIFT))
+#define PAGEBIT(zone_offset) ((int) (zone_offset & BIT_NUM_MASK))
+
+#define PAGE_UL_PTR(bitmap, node, zone_num, zone_pfn) \
+       ((bitmap[node][zone_num][PAGENUMBER(zone_pfn)])+PAGEINDEX(zone_pfn))
+
+#define pages_for_zone(zone) \
+	(DIV_ROUND_UP((zone)->spanned_pages, (PAGE_SIZE << 3)))
+
+#define pages_for_span(span) \
+	(DIV_ROUND_UP(span, PAGE_SIZE << 3))
+
+/* __maybe_unused for testing functions below */
+#define GET_BIT_AND_UL(pageflags, page) \
+	struct zone *zone = page_zone(page); \
+	unsigned long pfn = page_to_pfn(page); \
+	unsigned long zone_pfn = pfn - zone->zone_start_pfn; \
+	int node = page_to_nid(page); \
+	int zone_num = zone_idx(zone); \
+	int pagenum = PAGENUMBER(zone_pfn) + 2; \
+	int page_offset = PAGEINDEX(zone_pfn); \
+	unsigned long **zone_array = ((pageflags)->bitmap && \
+		(pageflags)->bitmap[node] && \
+		(pageflags)->bitmap[node][zone_num]) ? \
+			(pageflags)->bitmap[node][zone_num] : NULL; \
+	unsigned long __maybe_unused *ul = (zone_array && \
+		(unsigned long) zone_array[0] <= pfn && \
+		(unsigned long) zone_array[1] >= (pagenum-2) && \
+		zone_array[pagenum]) ? zone_array[pagenum] + page_offset : \
+		  NULL; \
+	int bit __maybe_unused = PAGEBIT(zone_pfn);
+
+#define for_each_online_pgdat_zone(pgdat, zone_nr) \
+	for_each_online_pgdat(pgdat) \
+		for (zone_nr = 0; zone_nr < MAX_NR_ZONES; zone_nr++)
+
+/**
+ * dump_pagemap - Display the contents of a bitmap for debugging purposes.
+ *
+ * @pagemap: The array to be dumped.
+ */
+void dump_pagemap(struct dyn_pageflags *pagemap)
+{
+	int i = 0;
+	struct pglist_data *pgdat;
+	unsigned long ****bitmap = pagemap->bitmap;
+
+	printk(" --- Dump bitmap %p ---\n", pagemap);
+
+	printk(KERN_INFO "%p: Sparse flag = %d\n",
+			&pagemap->sparse, pagemap->sparse);
+	printk(KERN_INFO "%p: Bitmap      = %p\n",
+			&pagemap->bitmap, bitmap);
+
+	if (!bitmap)
+		goto out;
+
+	for_each_online_pgdat(pgdat) {
+		int node_id = pgdat->node_id, zone_nr;
+		printk(KERN_INFO "%p: Node %d => %p\n",
+				&bitmap[node_id], node_id,
+				bitmap[node_id]);
+		if (!bitmap[node_id])
+			continue;
+		for (zone_nr = 0; zone_nr < MAX_NR_ZONES; zone_nr++) {
+			printk(KERN_INFO "%p:   Zone %d => %p%s\n",
+					&bitmap[node_id][zone_nr], zone_nr,
+					bitmap[node_id][zone_nr],
+					bitmap[node_id][zone_nr] ? "" :
+						" (empty)");
+			if (!bitmap[node_id][zone_nr])
+				continue;
+
+			printk(KERN_INFO "%p:     Zone start pfn  = %p\n",
+					&bitmap[node_id][zone_nr][0],
+					bitmap[node_id][zone_nr][0]);
+			printk(KERN_INFO "%p:     Number of pages = %p\n",
+					&bitmap[node_id][zone_nr][1],
+					bitmap[node_id][zone_nr][1]);
+			for (i = 2; i < (unsigned long) bitmap[node_id]
+					[zone_nr][1] + 2; i++)
+				printk(KERN_INFO
+					"%p:     Page %2d         = %p\n",
+					&bitmap[node_id][zone_nr][i],
+					i - 2,
+					bitmap[node_id][zone_nr][i]);
+		}
+	}
+out:
+	printk(KERN_INFO " --- Dump of bitmap %p finishes\n", pagemap);
+}
+EXPORT_SYMBOL_GPL(dump_pagemap);
+
+/**
+ * clear_dyn_pageflags - Zero all pageflags in a bitmap.
+ *
+ * @pagemap: The array to be cleared.
+ *
+ * Clear an array used to store dynamically allocated pageflags.
+ */
+void clear_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int i = 0, zone_idx;
+	struct pglist_data *pgdat;
+	unsigned long ****bitmap = pagemap->bitmap;
+
+	for_each_online_pgdat_zone(pgdat, zone_idx) {
+		int node_id = pgdat->node_id;
+		struct zone *zone = &pgdat->node_zones[zone_idx];
+
+		if (!populated_zone(zone) ||
+		   (!bitmap[node_id] || !bitmap[node_id][zone_idx]))
+			continue;
+
+		for (i = 2; i < pages_for_zone(zone) + 2; i++)
+			if (bitmap[node_id][zone_idx][i])
+				memset((bitmap[node_id][zone_idx][i]), 0,
+						PAGE_SIZE);
+	}
+}
+EXPORT_SYMBOL_GPL(clear_dyn_pageflags);
+
+/**
+ * Allocators.
+ *
+ * During boot time, we want to use alloc_bootmem_low. Afterwards, we want
+ * kzalloc. These routines let us do that without causing compile time warnings
+ * about mismatched sections, as would happen if we did a simple
+ * boot ? alloc_bootmem_low() : kzalloc() below.
+ */
+
+/**
+ * boot_time_allocator - Allocator used while booting.
+ *
+ * @size: Number of bytes wanted.
+ * @flags: Allocation flags (ignored here).
+ */
+static __init void *boot_time_allocator(unsigned long size, unsigned long flags)
+{
+	return alloc_bootmem_low(size);
+}
+
+/**
+ * normal_allocator - Allocator used post-boot.
+ *
+ * @size: Number of bytes wanted.
+ * @flags: Allocation flags.
+ *
+ * Allocate memory for our page flags.
+ */
+static void *normal_allocator(unsigned long size, unsigned long flags)
+{
+	if (size == PAGE_SIZE)
+		return (void *) get_zeroed_page(flags);
+	else
+		return kzalloc(size, flags);
+}
+
+/**
+ * dyn_pageflags_init - Do the earliest initialisation.
+ *
+ * Very early in the boot process, set our allocator (alloc_bootmem_low) and
+ * allocate bitmaps for slab and buddy pageflags.
+ */
+void __init dyn_pageflags_init(void)
+{
+	dyn_allocator = boot_time_allocator;
+}
+
+/**
+ * dyn_pageflags_use_kzalloc - Reset the allocator for normal use.
+ *
+ * Reset the allocator to our normal, post boot function.
+ */
+void __init dyn_pageflags_use_kzalloc(void)
+{
+	dyn_allocator = (void *) normal_allocator;
+}
+
+/**
+ * try_alloc_dyn_pageflag_part - Try to allocate a pointer array.
+ *
+ * Try to allocate a contiguous array of pointers.
+ */
+static int try_alloc_dyn_pageflag_part(int nr_ptrs, void **ptr)
+{
+	*ptr = (*dyn_allocator)(sizeof(void *) * nr_ptrs, GFP_ATOMIC);
+
+	if (*ptr)
+		return 0;
+
+	printk(KERN_INFO
+		"Error. Unable to allocate memory for dynamic pageflags.");
+	return -ENOMEM;
+}
+
+static int populate_bitmap_page(struct dyn_pageflags *pageflags, int take_lock,
+			unsigned long **page_ptr)
+{
+	void *address;
+	unsigned long flags = 0;
+
+	if (take_lock)
+		spin_lock_irqsave(&pageflags->struct_lock, flags);
+
+	/*
+	 * The page may have been allocated while we waited.
+	 */
+	if (*page_ptr)
+		goto out;
+
+	address = (*dyn_allocator)(PAGE_SIZE, GFP_ATOMIC);
+
+	if (!address) {
+		PR_DEBUG("Error. Unable to allocate memory for "
+			"dynamic pageflags page.");
+		if (pageflags)
+			spin_unlock_irqrestore(&pageflags->struct_lock, flags);
+		return -ENOMEM;
+	}
+
+	*page_ptr = address;
+out:
+	if (take_lock)
+		spin_unlock_irqrestore(&pageflags->struct_lock, flags);
+	return 0;
+}
+
+/**
+ * resize_zone_bitmap - Resize the array of pages for a bitmap.
+ *
+ * Shrink or extend a list of pages for a zone in a bitmap, preserving
+ * existing data.
+ */
+static int resize_zone_bitmap(struct dyn_pageflags *pagemap, struct zone *zone,
+		unsigned long old_pages, unsigned long new_pages,
+		unsigned long copy_offset, int take_lock)
+{
+	unsigned long **new_ptr = NULL, ****bitmap = pagemap->bitmap;
+	int node_id = zone_to_nid(zone), zone_idx = zone_idx(zone),
+	    to_copy = min(old_pages, new_pages), result = 0;
+	unsigned long **old_ptr = bitmap[node_id][zone_idx], i;
+
+	if (new_pages) {
+		if (try_alloc_dyn_pageflag_part(new_pages + 2,
+					(void **) &new_ptr))
+			return -ENOMEM;
+
+		if (old_pages)
+			memcpy(new_ptr + 2 + copy_offset, old_ptr + 2,
+					sizeof(unsigned long) * to_copy);
+
+		new_ptr[0] = (void *) zone->zone_start_pfn;
+		new_ptr[1] = (void *) new_pages;
+	}
+
+	/* Free/alloc bitmap pages. */
+	if (old_pages > new_pages) {
+		for (i = new_pages + 2; i < old_pages + 2; i++)
+			if (old_ptr[i])
+				free_page((unsigned long) old_ptr[i]);
+	} else if (!pagemap->sparse) {
+		for (i = old_pages + 2; i < new_pages + 2; i++)
+			if (populate_bitmap_page(NULL, take_lock,
+					(unsigned long **) &new_ptr[i])) {
+				result = -ENOMEM;
+				break;
+			}
+	}
+
+	bitmap[node_id][zone_idx] = new_ptr;
+	kfree(old_ptr);
+	return result;
+}
+
+/**
+ * check_dyn_pageflag_range - Resize a section of a dyn_pageflag array.
+ *
+ * @pagemap: The array to be worked on.
+ * @zone: The zone to get in sync with reality.
+ *
+ * Check the pagemap has correct allocations for the zone. This can be
+ * invoked when allocating a new bitmap, or for hot[un]plug, and so
+ * must deal with any disparities between zone_start_pfn/spanned_pages
+ * and what we have allocated. In addition, we must deal with the possibility
+ * of zone_start_pfn having changed.
+ */
+int check_dyn_pageflag_zone(struct dyn_pageflags *pagemap, struct zone *zone,
+		int force_free_all, int take_lock)
+{
+	int node_id = zone_to_nid(zone), zone_idx = zone_idx(zone);
+	unsigned long copy_offset = 0, old_pages, new_pages;
+	unsigned long **old_ptr = pagemap->bitmap[node_id][zone_idx];
+
+	old_pages = old_ptr ? (unsigned long) old_ptr[1] : 0;
+	new_pages = force_free_all ? 0 : pages_for_span(zone->spanned_pages);
+
+	if (old_pages == new_pages &&
+	    (!old_pages || (unsigned long) old_ptr[0] == zone->zone_start_pfn))
+		return 0;
+
+	if (old_pages &&
+	    (unsigned long) old_ptr[0] != zone->zone_start_pfn)
+		copy_offset = pages_for_span((unsigned long) old_ptr[0] -
+							zone->zone_start_pfn);
+
+	/* New/expanded zone? */
+	return resize_zone_bitmap(pagemap, zone, old_pages, new_pages,
+			copy_offset, take_lock);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+/**
+ * dyn_pageflags_hotplug - Add pages to bitmaps for hotplugged memory.
+ *
+ * Seek to expand bitmaps for hotplugged memory. We ignore any failure.
+ * Since we handle sparse bitmaps anyway, they'll be automatically
+ * populated as needed.
+ */
+void dyn_pageflags_hotplug(struct zone *zone)
+{
+	struct dyn_pageflags *this;
+
+	list_for_each_entry(this, &flags_list, list)
+		check_dyn_pageflag_zone(this, zone, 0, 1);
+}
+#endif
+
+/**
+ * free_dyn_pageflags - Free an array of dynamically allocated pageflags.
+ *
+ * @pagemap: The array to be freed.
+ *
+ * Free a dynamically allocated pageflags bitmap.
+ */
+void free_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int zone_idx;
+	struct pglist_data *pgdat;
+	unsigned long flags;
+
+	DUMP_DEBUG(pagemap);
+
+	if (!pagemap->bitmap)
+		return;
+
+	for_each_online_pgdat_zone(pgdat, zone_idx)
+		check_dyn_pageflag_zone(pagemap,
+				&pgdat->node_zones[zone_idx], 1, 1);
+
+	for_each_online_pgdat(pgdat) {
+		int i = pgdat->node_id;
+
+		if (pagemap->bitmap[i])
+			kfree((pagemap->bitmap)[i]);
+	}
+
+	kfree(pagemap->bitmap);
+	pagemap->bitmap = NULL;
+
+	pagemap->initialised = 0;
+
+	if (!pagemap->sparse) {
+		spin_lock_irqsave(&flags_list_lock, flags);
+		list_del_init(&pagemap->list);
+		pagemap->sparse = 1;
+		spin_unlock_irqrestore(&flags_list_lock, flags);
+	}
+}
+EXPORT_SYMBOL_GPL(free_dyn_pageflags);
+
+/**
+ * allocate_dyn_pageflags - Allocate a bitmap.
+ *
+ * @pagemap: The bitmap we want to allocate.
+ * @sparse: Whether to make the array sparse.
+ *
+ * The array we're preparing. If sparse, we don't allocate the actual
+ * pages until they're needed. If not sparse, we add the bitmap to the
+ * list so that if we're supporting memory hotplugging, we can allocate
+ * new pages on hotplug events.
+ *
+ * This routine may be called directly, or indirectly when the first bit
+ * needs to be set on a previously unused bitmap.
+ */
+int allocate_dyn_pageflags(struct dyn_pageflags *pagemap, int sparse)
+{
+	int zone_idx, result = -ENOMEM;
+	struct zone *zone;
+	struct pglist_data *pgdat;
+	unsigned long flags;
+
+	if (!sparse && (pagemap->sparse || !pagemap->initialised)) {
+		spin_lock_irqsave(&flags_list_lock, flags);
+		list_add(&pagemap->list, &flags_list);
+		spin_unlock_irqrestore(&flags_list_lock, flags);
+	}
+
+	spin_lock_irqsave(&pagemap->struct_lock, flags);
+
+	pagemap->initialised = 1;
+	pagemap->sparse = sparse;
+
+	if (!pagemap->bitmap && try_alloc_dyn_pageflag_part((1 << NODES_WIDTH),
+				(void **) &pagemap->bitmap))
+		goto out;
+
+	for_each_online_pgdat(pgdat) {
+		int node_id = pgdat->node_id;
+
+		if (!pagemap->bitmap[node_id] &&
+		    try_alloc_dyn_pageflag_part(MAX_NR_ZONES,
+			(void **) &(pagemap->bitmap)[node_id]))
+				goto out;
+
+		for (zone_idx = 0; zone_idx < MAX_NR_ZONES; zone_idx++) {
+			zone = &pgdat->node_zones[zone_idx];
+
+			if (populated_zone(zone) &&
+			    check_dyn_pageflag_zone(pagemap, zone, 0, 0))
+				goto out;
+		}
+	}
+
+	result = 0;
+
+out:
+	spin_unlock_irqrestore(&pagemap->struct_lock, flags);
+	return result;
+}
+EXPORT_SYMBOL_GPL(allocate_dyn_pageflags);
+
+/**
+ * test_dynpageflag - Test a page in a bitmap.
+ *
+ * @bitmap: The bitmap we're checking.
+ * @page: The page for which we want to test the matching bit.
+ *
+ * Test whether the bit is on in the array. The array may be sparse,
+ * in which case the result is zero.
+ */
+int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page)
+{
+	GET_BIT_AND_UL(bitmap, page);
+	return ul ? test_bit(bit, ul) : 0;
+}
+EXPORT_SYMBOL_GPL(test_dynpageflag);
+
+/**
+ * set_dynpageflag - Set a bit in a bitmap.
+ *
+ * @bitmap: The bitmap we're operating on.
+ * @page: The page for which we want to set the matching bit.
+ *
+ * Set the associated bit in the array. If the array is sparse, we
+ * seek to allocate the missing page.
+ */
+void set_dynpageflag(struct dyn_pageflags *pageflags, struct page *page)
+{
+	GET_BIT_AND_UL(pageflags, page);
+
+	if (!ul) {
+		/*
+		 * Sparse, hotplugged or unprepared.
+		 * Allocate / fill gaps in high levels
+		 */
+		if (allocate_dyn_pageflags(pageflags, 1) ||
+		    populate_bitmap_page(pageflags, 1, (unsigned long **)
+				&pageflags->bitmap[node][zone_num][pagenum])) {
+			printk(KERN_EMERG "Failed to allocate storage in a "
+					"sparse bitmap.\n");
+			dump_pagemap(pageflags);
+			BUG();
+		}
+		set_dynpageflag(pageflags, page);
+	} else
+		set_bit(bit, ul);
+}
+EXPORT_SYMBOL_GPL(set_dynpageflag);
+
+/**
+ * clear_dynpageflag - Clear a bit in a bitmap.
+ *
+ * @bitmap: The bitmap we're operating on.
+ * @page: The page for which we want to clear the matching bit.
+ *
+ * Clear the associated bit in the array. It is not an error to be asked
+ * to clear a bit on a page we haven't allocated.
+ */
+void clear_dynpageflag(struct dyn_pageflags *bitmap, struct page *page)
+{
+	GET_BIT_AND_UL(bitmap, page);
+	if (ul)
+		clear_bit(bit, ul);
+}
+EXPORT_SYMBOL_GPL(clear_dynpageflag);
+
+/**
+ * get_next_bit_on - Get the next bit in a bitmap.
+ *
+ * @pageflags: The bitmap we're searching.
+ * @counter: The previous pfn. We always return a value > this.
+ *
+ * Given a pfn (possibly max_pfn+1), find the next pfn in the bitmap that
+ * is set. If there are no more flags set, return max_pfn+1.
+ */
+unsigned long get_next_bit_on(struct dyn_pageflags *pageflags,
+		unsigned long counter)
+{
+	struct page *page;
+	struct zone *zone;
+	unsigned long *ul = NULL;
+	unsigned long zone_offset;
+	int pagebit, zone_num, first = (counter == (max_pfn + 1)), node;
+
+	if (first)
+		counter = first_online_pgdat()->node_zones->zone_start_pfn;
+
+	page = pfn_to_page(counter);
+	zone = page_zone(page);
+	node = zone->zone_pgdat->node_id;
+	zone_num = zone_idx(zone);
+	zone_offset = counter - zone->zone_start_pfn;
+
+	if (first)
+		goto test;
+
+	do {
+		zone_offset++;
+
+		if (zone_offset >= zone->spanned_pages) {
+			do {
+				zone = next_zone(zone);
+				if (!zone)
+					return max_pfn + 1;
+			} while (!zone->spanned_pages);
+
+			zone_num = zone_idx(zone);
+			node = zone->zone_pgdat->node_id;
+			zone_offset = 0;
+		}
+test:
+		pagebit = PAGEBIT(zone_offset);
+
+		if (!pagebit || !ul) {
+			ul = pageflags->bitmap[node][zone_num]
+				[PAGENUMBER(zone_offset)+2];
+			if (ul)
+				ul += PAGEINDEX(zone_offset);
+			else {
+				PR_DEBUG("Unallocated page. Skipping from zone"
+					" offset %lu to the start of the next "
+					"one.\n", zone_offset);
+				zone_offset = roundup(zone_offset + 1,
+						PAGE_SIZE << 3) - 1;
+				PR_DEBUG("New zone offset is %lu.\n",
+						zone_offset);
+				continue;
+			}
+		}
+
+		if (!ul || !(*ul & ~((1 << pagebit) - 1))) {
+			zone_offset += BITS_PER_LONG - pagebit - 1;
+			continue;
+		}
+
+	} while (!ul || !test_bit(pagebit, ul));
+
+	return zone->zone_start_pfn + zone_offset;
+}
+EXPORT_SYMBOL_GPL(get_next_bit_on);
+
+#ifdef SELF_TEST
+#include <linux/jiffies.h>
+
+static __init int dyn_pageflags_test(void)
+{
+	struct dyn_pageflags test_map;
+	struct page *test_page1 = pfn_to_page(1);
+	unsigned long pfn = 0, start, end;
+	int i, iterations;
+
+	memset(&test_map, 0, sizeof(test_map));
+
+	printk("Dynpageflags testing...\n");
+
+	printk(KERN_INFO "Set page 1...");
+	set_dynpageflag(&test_map, test_page1);
+	if (test_dynpageflag(&test_map, test_page1))
+		printk(KERN_INFO "Ok.\n");
+	else
+		printk(KERN_INFO "FAILED.\n");
+
+	printk(KERN_INFO "Test memory hotplugging #1 ...");
+	{
+		unsigned long orig_size;
+		GET_BIT_AND_UL(&test_map, test_page1);
+		orig_size = (unsigned long) test_map.bitmap[node][zone_num][1];
+		/*
+		 * Use the code triggered when zone_start_pfn lowers,
+		 * checking that our bit is then set in the third page.
+		 */
+		resize_zone_bitmap(&test_map, zone, orig_size,
+				orig_size + 2, 2);
+		DUMP_DEBUG(&test_map);
+		if ((unsigned long) test_map.bitmap[node][zone_num]
+				[pagenum + 2] &&
+		    (unsigned long) test_map.bitmap[node][zone_num]
+				[pagenum + 2][0] == 2UL)
+			printk(KERN_INFO "Ok.\n");
+		else
+			printk(KERN_INFO "FAILED.\n");
+	}
+
+	printk(KERN_INFO "Test memory hotplugging #2 ...");
+	{
+		/*
+		 * Test expanding bitmap length.
+		 */
+		unsigned long orig_size;
+		GET_BIT_AND_UL(&test_map, test_page1);
+		orig_size = (unsigned long) test_map.bitmap[node]
+							[zone_num][1];
+		resize_zone_bitmap(&test_map, zone, orig_size,
+				orig_size + 2, 0);
+		DUMP_DEBUG(&test_map);
+		pagenum += 2; /* Offset for first test */
+		if (test_map.bitmap[node][zone_num][pagenum] &&
+		    test_map.bitmap[node][zone_num][pagenum][0] == 2UL &&
+		    (unsigned long) test_map.bitmap[node][zone_num][1] ==
+						orig_size + 2)
+			printk(KERN_INFO "Ok.\n");
+		else
+			printk(KERN_INFO "FAILED ([%d][%d][%d]: %p && %lu == "
+				"2UL  && %p == %lu).\n",
+				node, zone_num, pagenum,
+				test_map.bitmap[node][zone_num][pagenum],
+				test_map.bitmap[node][zone_num][pagenum] ?
+				test_map.bitmap[node][zone_num][pagenum][0] : 0,
+				test_map.bitmap[node][zone_num][1],
+				orig_size + 2);
+	}
+
+	free_dyn_pageflags(&test_map);
+
+	allocate_dyn_pageflags(&test_map, 0);
+
+	start = jiffies;
+
+	iterations = 25000000 / max_pfn;
+
+	for (i = 0; i < iterations; i++) {
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			set_dynpageflag(&test_map, pfn_to_page(pfn));
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			clear_dynpageflag(&test_map, pfn_to_page(pfn));
+	}
+
+	end = jiffies;
+
+	free_dyn_pageflags(&test_map);
+
+	printk(KERN_INFO "Dyn: %d iterations of setting & clearing all %lu "
+			"flags took %lu jiffies.\n",
+			iterations, max_pfn, end - start);
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			set_bit(7, &(pfn_to_page(pfn))->flags);
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			clear_bit(7, &(pfn_to_page(pfn))->flags);
+	}
+
+	end = jiffies;
+
+	printk(KERN_INFO "Real flags: %d iterations of setting & clearing "
+			"all %lu flags took %lu jiffies.\n",
+			iterations, max_pfn, end - start);
+
+	iterations = 25000000;
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		set_dynpageflag(&test_map, pfn_to_page(1));
+		clear_dynpageflag(&test_map, pfn_to_page(1));
+	}
+
+	end = jiffies;
+
+	printk(KERN_INFO "Dyn: %d iterations of setting & clearing all one "
+			"flag took %lu jiffies.\n", iterations, end - start);
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		set_bit(7, &(pfn_to_page(1))->flags);
+		clear_bit(7, &(pfn_to_page(1))->flags);
+	}
+
+	end = jiffies;
+
+	printk(KERN_INFO "Real pageflag: %d iterations of setting & clearing "
+			"all one flag took %lu jiffies.\n",
+			iterations, end - start);
+	return 0;
+}
+
+late_initcall(dyn_pageflags_test);
+#endif
+
+static int __init dyn_pageflags_debug_setup(char *str)
+{
+	printk(KERN_INFO "Dynamic pageflags debugging enabled.\n");
+	dyn_pageflags_debug = 1;
+	return 1;
+}
+
+__setup("dyn_pageflags_debug", dyn_pageflags_debug_setup);
diff -Npur linux-2.6-block/mm/memory_hotplug.c linux-2.6-block-custom/mm/memory_hotplug.c
--- linux-2.6-block/mm/memory_hotplug.c	2008-09-27 16:12:29.202926699 +0900
+++ linux-2.6-block-custom/mm/memory_hotplug.c	2008-09-26 19:45:20.402755449 +0900
@@ -213,6 +213,8 @@ static int __add_zone(struct zone *zone,
 	pgdat_resize_unlock(zone->zone_pgdat, &flags);
 	memmap_init_zone(nr_pages, nid, zone_type,
 			 phys_start_pfn, MEMMAP_HOTPLUG);
+
+	dyn_pageflags_hotplug(zone);
 	return 0;
 }
 
diff -Npur linux-2.6-block/mm/page_alloc.c linux-2.6-block-custom/mm/page_alloc.c
--- linux-2.6-block/mm/page_alloc.c	2008-09-27 16:12:29.218922587 +0900
+++ linux-2.6-block-custom/mm/page_alloc.c	2008-09-26 19:45:20.402755449 +0900
@@ -1774,6 +1774,26 @@ static unsigned int nr_free_zone_pages(i
 	return sum;
 }
 
+static unsigned int nr_unallocated_zone_pages(int offset)
+{
+	struct zoneref *z;
+	struct zone *zone;
+
+	/* Just pick one node, since fallback list is circular */
+	unsigned int sum = 0;
+
+	struct zonelist *zonelist = node_zonelist(numa_node_id(), GFP_KERNEL);
+
+	for_each_zone_zonelist(zone, z, zonelist, offset) {
+		unsigned long high = zone->pages_high;
+		unsigned long left = zone_page_state(zone, NR_FREE_PAGES);
+		if (left > high)
+			sum += left - high;
+	}
+
+	return sum;
+}
+
 /*
  * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
  */
@@ -1784,6 +1804,15 @@ unsigned int nr_free_buffer_pages(void)
 EXPORT_SYMBOL_GPL(nr_free_buffer_pages);
 
 /*
+ * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_unallocated_buffer_pages(void)
+{
+	return nr_unallocated_zone_pages(gfp_zone(GFP_USER));
+}
+EXPORT_SYMBOL_GPL(nr_unallocated_buffer_pages);
+
+/*
  * Amount of free RAM allocatable within all zones
  */
 unsigned int nr_free_pagecache_pages(void)
diff -Npur linux-2.6-block/mm/vmscan.c linux-2.6-block-custom/mm/vmscan.c
--- linux-2.6-block/mm/vmscan.c	2008-09-27 16:12:29.246940324 +0900
+++ linux-2.6-block-custom/mm/vmscan.c	2008-09-26 19:45:20.406755538 +0900
@@ -811,6 +811,28 @@ static unsigned long isolate_lru_pages(u
 	return nr_taken;
 }
 
+/* return_lru_pages puts a list of pages back on a zone's lru lists. */
+
+static void return_lru_pages(struct list_head *page_list, struct zone *zone,
+		struct pagevec *pvec)
+{
+	while (!list_empty(page_list)) {
+		struct page *page = lru_to_page(page_list);
+		VM_BUG_ON(PageLRU(page));
+		SetPageLRU(page);
+		list_del(&page->lru);
+		if (PageActive(page))
+			add_page_to_active_list(zone, page);
+		else
+			add_page_to_inactive_list(zone, page);
+		if (!pagevec_add(pvec, page)) {
+			spin_unlock_irq(&zone->lru_lock);
+			__pagevec_release(pvec);
+			spin_lock_irq(&zone->lru_lock);
+		}
+	}
+}
+
 static unsigned long isolate_pages_global(unsigned long nr,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -861,7 +883,6 @@ static unsigned long shrink_inactive_lis
 	lru_add_drain();
 	spin_lock_irq(&zone->lru_lock);
 	do {
-		struct page *page;
 		unsigned long nr_taken;
 		unsigned long nr_scan;
 		unsigned long nr_freed;
@@ -923,21 +944,7 @@ static unsigned long shrink_inactive_lis
 		/*
 		 * Put back any unfreeable pages.
 		 */
-		while (!list_empty(&page_list)) {
-			page = lru_to_page(&page_list);
-			VM_BUG_ON(PageLRU(page));
-			SetPageLRU(page);
-			list_del(&page->lru);
-			if (PageActive(page))
-				add_page_to_active_list(zone, page);
-			else
-				add_page_to_inactive_list(zone, page);
-			if (!pagevec_add(&pvec, page)) {
-				spin_unlock_irq(&zone->lru_lock);
-				__pagevec_release(&pvec);
-				spin_lock_irq(&zone->lru_lock);
-			}
-		}
+		return_lru_pages(&page_list, zone, &pvec);
   	} while (nr_scanned < max_scan);
 	spin_unlock(&zone->lru_lock);
 done:
@@ -1665,6 +1672,72 @@ out:
 	return nr_reclaimed;
 }
 
+struct lru_save {
+	struct zone 		*zone;
+	struct list_head	active_list;
+	struct list_head	inactive_list;
+	struct lru_save		*next;
+};
+
+struct lru_save *lru_save_list;
+
+void unlink_lru_lists(void)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		struct lru_save *this;
+		unsigned long moved, scanned;
+
+		if (!zone->spanned_pages)
+			continue;
+
+		this = (struct lru_save *)
+			kzalloc(sizeof(struct lru_save), GFP_ATOMIC);
+
+		BUG_ON(!this);
+
+		this->next = lru_save_list;
+		lru_save_list = this;
+
+		this->zone = zone;
+
+		spin_lock_irq(&zone->lru_lock);
+		INIT_LIST_HEAD(&this->active_list);
+		INIT_LIST_HEAD(&this->inactive_list);
+		moved = isolate_lru_pages(zone_page_state(zone, NR_ACTIVE),
+				&zone->active_list, &this->active_list,
+				&scanned, 0, ISOLATE_BOTH);
+		__mod_zone_page_state(zone, NR_ACTIVE, -moved);
+		moved = isolate_lru_pages(zone_page_state(zone, NR_INACTIVE),
+				&zone->inactive_list, &this->inactive_list,
+				&scanned, 0, ISOLATE_BOTH);
+		__mod_zone_page_state(zone, NR_INACTIVE, -moved);
+		spin_unlock_irq(&zone->lru_lock);
+	}
+}
+
+void relink_lru_lists(void)
+{
+	while (lru_save_list) {
+		struct lru_save *this = lru_save_list;
+		struct zone *zone = this->zone;
+		struct pagevec pvec;
+
+		pagevec_init(&pvec, 1);
+
+		lru_save_list = this->next;
+
+		spin_lock_irq(&zone->lru_lock);
+		return_lru_pages(&this->active_list, zone, &pvec);
+		return_lru_pages(&this->inactive_list, zone, &pvec);
+		spin_unlock_irq(&zone->lru_lock);
+		pagevec_release(&pvec);
+
+		kfree(this);
+	}
+}
+
 /*
  * The background pageout daemon, started as a kernel thread
  * from the init process. 
@@ -1749,6 +1822,9 @@ void wakeup_kswapd(struct zone *zone, in
 	if (!populated_zone(zone))
 		return;
 
+	if (freezer_is_on())
+		return;
+
 	pgdat = zone->zone_pgdat;
 	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
 		return;
@@ -1762,6 +1838,109 @@ void wakeup_kswapd(struct zone *zone, in
 }
 
 #ifdef CONFIG_PM
+static unsigned long shrink_ps1_zone(struct zone *zone,
+	unsigned long total_to_free, struct scan_control sc)
+{
+	unsigned long freed = 0;
+
+	while (total_to_free > freed) {
+		unsigned long nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
+		struct reclaim_state reclaim_state;
+
+		if (nr_slab > total_to_free)
+			nr_slab = total_to_free;
+
+		reclaim_state.reclaimed_slab = 0;
+		shrink_slab(nr_slab, sc.gfp_mask, nr_slab);
+		if (!reclaim_state.reclaimed_slab)
+			return freed;
+
+		freed += reclaim_state.reclaimed_slab;
+	}
+
+	return freed;
+}
+
+unsigned long shrink_ps2_zone(struct zone *zone, unsigned long total_to_free,
+		struct scan_control sc)
+{
+	int prio;
+	unsigned long freed = 0;
+	if (!populated_zone(zone) || zone_is_all_unreclaimable(zone))
+		return 0;
+
+	for (prio = DEF_PRIORITY; prio >= 0; prio--) {
+		unsigned long to_free, just_freed, orig_size;
+		unsigned long old_nr_active;
+
+		to_free = min(zone_page_state(zone, NR_ACTIVE) +
+				zone_page_state(zone, NR_INACTIVE),
+				total_to_free - freed);
+
+		if (to_free <= 0)
+			return freed;
+
+		sc.swap_cluster_max = to_free -
+			zone_page_state(zone, NR_INACTIVE);
+
+		do {
+			old_nr_active = zone_page_state(zone, NR_ACTIVE);
+			zone->nr_scan_active = sc.swap_cluster_max - 1;
+			shrink_active_list(sc.swap_cluster_max, zone, &sc,
+					prio);
+			zone->nr_scan_active = 0;
+
+			sc.swap_cluster_max = to_free - zone_page_state(zone,
+					NR_INACTIVE);
+
+		} while (sc.swap_cluster_max > 0 &&
+			 zone_page_state(zone, NR_ACTIVE) > old_nr_active);
+
+		to_free = min(zone_page_state(zone, NR_ACTIVE) +
+				zone_page_state(zone, NR_INACTIVE),
+				total_to_free - freed);
+
+		do {
+			orig_size = zone_page_state(zone, NR_ACTIVE) +
+				zone_page_state(zone, NR_INACTIVE);
+			zone->nr_scan_inactive = to_free;
+			sc.swap_cluster_max = to_free;
+			shrink_inactive_list(to_free, zone, &sc);
+			just_freed = (orig_size -
+				(zone_page_state(zone, NR_ACTIVE) +
+				 zone_page_state(zone, NR_INACTIVE)));
+			zone->nr_scan_inactive = 0;
+			freed += just_freed;
+		} while (just_freed > 0 && freed < total_to_free);
+	}
+
+	return freed;
+}
+
+void shrink_one_zone(struct zone *zone, unsigned long total_to_free,
+		int ps_wanted)
+{
+	unsigned long freed = 0;
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.may_swap = 0,
+		.may_writepage = 1,
+		.swappiness = vm_swappiness,
+		.isolate_pages = isolate_pages_global,
+	};
+
+	if (total_to_free <= 0)
+		return;
+
+	if (is_highmem(zone))
+		sc.gfp_mask |= __GFP_HIGHMEM;
+
+	if (ps_wanted & 2)
+		freed = shrink_ps2_zone(zone, total_to_free, sc);
+	if (ps_wanted & 1)
+		shrink_ps1_zone(zone, total_to_free - freed, sc);
+}
+
 /*
  * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
  * from LRU lists system-wide, for given pass and priority, and returns the
