Location: PHPKode > projects > Disk Usage Report > diskusagereports_v1.0.2/scripts/process.php
<?php

/* 
 * Copyright (c) 2011 André Mekkawi <hide@address.com>
 * Version: v1.0.2
 * 
 * LICENSE
 * 
 * This source file is subject to the MIT license in the file LICENSE.txt.
 * The license is also available at http://diskusagereports.com/license.html
 */

// ======================================================
// Customize the following two arrays to change the
// grouping for the "Last Modified" and "File Sizes" tabs.

// Labels for size ranges.
$sizeGroups = array(
	array('label' => '1 GB or More', 'size' => 1024 * 1024 * 1024),
	array('label' => '500 MB - 1 GB', 'size' => 1024 * 1024 * 500),
	array('label' => '250 MB - 500 MB', 'size' => 1024 * 1024 * 250),
	array('label' => '125 MB - 250 MB', 'size' => 1024 * 1024 * 125),
	array('label' => '75 MB - 125 MB', 'size' => 1024 * 1024 * 75),
	array('label' => '25 MB - 75 MB', 'size' => 1024 * 1024 * 25),
	array('label' => '10 MB - 25 MB', 'size' => 1024 * 1024 * 10),
	array('label' => '5 MB - 10 MB', 'size' => 1024 * 1024 * 5),
	array('label' => '1 MB - 5 MB', 'size' => 1024 * 1024 * 1),
	array('label' => '500 KB - 1 MB', 'size' => 1024 * 500),
	array('label' => '250 KB - 500 KB', 'size' => 1024 * 250),
	array('label' => '100 KB - 250 KB', 'size' => 1024 * 100),
	array('label' => '50 KB - 100 KB', 'size' => 1024 * 50),
	array('label' => '25 KB - 50 KB', 'size' => 1024 * 25),
	array('label' => '10 KB - 25 KB', 'size' => 1024 * 10),
	array('label' => '5 KB - 10 KB', 'size' => 1024 * 5),
	array('label' => '1 KB - 5 KB', 'size' => 1024 * 1),
	array('label' => 'Less than 1 KB', 'size' => 0)
);

// Labels for age ranges.
$dateFormat = 'Y-m-d';
$modifiedGroups = array(
	array('label' => '10 Years or More', 'date' => '-10 year'),
	array('label' => '5 - 10 Years', 'date' => '-5 year'),
	array('label' => '2 - 5 Years', 'date' => '-2 year'),
	array('label' => '1 - 2 Years', 'date' => '-1 year'),
	array('label' => '270 - 365 Days', 'date' => '-270 day'),
	array('label' => '180 - 270 Days', 'date' => '-180 day'),
	array('label' => '90 - 180 Days', 'date' => '-90 day'),
	array('label' => '60 - 90 Days', 'date' => '-60 day'),
	array('label' => '30 - 60 Days', 'date' => '-30 day'),
	array('label' => '15 - 30 Days', 'date' => '-15 day'),
	array('label' => '7 - 15 Days', 'date' => '-7 day'),
	array('label' => '1 - 7 Days', 'date' => '-1 day'),
	array('label' => 'Today', 'date' => 'today'),
	array('label' => 'Future', 'date' => '9999-99-99')
);

// ======================================================

// export TZ=UTC; find "DIRECTORYNAME" -type d -or -type f -printf '%y %TY-%Tm-%Td %TT %s %d %h %f\n' > "OUTFILENAME"; unset TZ
// cat diskusage-gs.txt | sed -En -e 's/^d/&/p' -e 's/^f.+\.(jpg)$/&/p' | php scripts/process.php ../diskusage-data/test2
// php scripts/find.php `pwd` | sed -E -e 's/^.*\.svn.*$//' -e 's/^.*diskusage-[a-z0-9]+\.txt.*$//' -e 's/^.*\.settings.*$//' -e 's/^.*\$dev.*$//' -e 's/^.*\.DS_Store.*$//' -e 's/^.*\.tmp_.*$//' -e '/^$/d' | php scripts/process.php -n "Disk Usage Reports Code" ../diskusage-data/test2

// Make sure this script is run from the command line.
if (php_sapi_name() != "cli") {
	echo "Must be run from the command line.\n";
	exit(1);
}

// Show/hide debugging output (if any).
define('DEBUG', FALSE);

if (DEBUG) echo "Includes...\n";

require_once('inc/functions.inc.php');
require_once('inc/process.class.php');

// Backwards compatibility includes.
if(!function_exists('json_encode')) {
	require_once('inc/json_encode.php');
}
if(!function_exists('file_put_contents')) {
	require_once('inc/file_put_contents.php');
}

if (DEBUG) echo "Creating processor...\n";

$processor = new Process();

if (DEBUG) echo "Setting timezone...\n";

// Default arguments (most arguments are stored within $processor)
$args = array(
	'timezone' => function_exists('date_default_timezone_get') ? @date_default_timezone_get() : 'America/New_York',
);

if (DEBUG) echo "Processing command line arguments...\n";

$cliargs = array_slice($_SERVER['argv'], 1);
$syntax = "Syntax: php process.php [OPTIONS] <report-directory> [<filelist>]\nUse -h for full help or visit diskusagereports.com/docs.\n";

$syntax_long = <<<EOT
Syntax: php process.php [OPTIONS] <report-directory> [<filelist>]

<report-directory>
The directory where the report files will be saved. This should point to a
directory under the 'data' directory.
Examples: /var/www/html/diskusage/data/myreport
          C:\Inetpub\wwwroot\diskusage\data\myreport

<filelist>
The file that was created using one of the 'find' scripts (e.g. find.php).
If you ommit this, process.php will attempt to read the file list from STDIN.

The OPTIONS are:

      -d <delim>
      The field delimiter that each line of the filelist will be split using.
      The default is the NULL character.

      -ds <directoryseparator>
      Specify the directory separator used in the file list. This is useful
      if the list from step 1 was generated on a different operating system
      which uses a different directory separator. For example, Windows uses
      a backslash (\) while Linux/BSD/Mac/etc systems use a forward slash (/).
      The default is the directory separator for the operating system
      processing the report.

      -fp
      Display the full path of the directories in the report. This is off by
      default since it could potentially pose a security risk.

      -l <num>
      Lines in the report that are longer than <num> will not be processed.
      This is just a failsafe to prevent the script from processing a list
      file that is not formatted properly. The default is 1024.

      -mt <bytes>
      The maximum number of bytes that the 'directory tree' file can be.
      The default is 819200. If the 'directory tree' file gets larger than
      this number, then the script will act as if -nt had been specified.

      -n <reportname>
      This text will display in the header of the report.

      -nt
      Disable the directory tree that appears on the left side of the report.
      
      -su <suffix>
      Set the suffix of report files. This is '.txt' by default. You must
      also edit the 'suffix' variable in index.html to include any suffix
      besides the default or an empty suffix.

      -q
      Do not output any text to STDOUT. The script will return a non-zero
      if it fails.
      
      -t <num>
      Limit the "File Sizes", "Modified", and "File Types" totals to only
      <num> directories deep in the report. This is useful if the directory
      being reported on has many files, which can cause the report to take a
      long time to generate. For example, if this is set to 3 the directory
      ./a, ./a/b and ./a/b/c will have these totals available, but ./a/b/c/d
      will not. The default is 6.

      -td <num>
      Similar to -t but instead limits the "Top 100" list to only <num>
      directories deep in the report. This is useful if the directory being
      reported on has many files, which can cause the report to take a long
      time to generate. The default is 3.

      -tz <timezone>
      Set the report timezone. These are the same timezones as
      http://php.net/manual/en/timezones.php. The default is the system's
      timezone (if it can be determined).
      
      -v
      Output additional information as the script executes.
      
      -vv
      Output more information than -v.
      
Notes:

      o All OPTIONS must be before <report-directory>.
      
      o You should set the -tz option as trying to determine the system's
        timezone is unreliable.
        
      o You may execute process.php on a separate server than the 'find'
        script if you are worried about it using CPU time.
        
      o The directory separator used in <filelist> must be a forward slash
        if this script is executed on a *nix system.

See also: diskusagereports.com/docs


EOT;

// Process command line arguments.
while (!is_null($cliarg = array_shift($cliargs))) {
	$shifted = TRUE;
	
	switch ($cliarg) {
		case '/?':
		case '-?':
		case '-h':
		case '--help':
			echo $syntax_long;
			exit(1);
		case '-tz':
			$args['timezone'] = $shifted = array_shift($cliargs);
			break;
		case '-d':
			$processor->setDelim($shifted = array_shift($cliargs));
			break;
		case '-t':
			$processor->setTotalsDepth(intval($shifted = array_shift($cliargs)));
			if (!preg_match('/^[0-9]+$/', $shifted)) echo "$cliarg must be followed by a number.\n"; exit(1);
			break;
		case '-nt':
			$processor->setNoTree(TRUE);
			break;
		case '-mt':
			$processor->setMaxTreeSize(intval($shifted = array_shift($cliargs)));
			if (!preg_match('/^[0-9]+$/', $shifted)) echo "$cliarg must be followed by a number.\n"; exit(1);
			break;
		case '-ds':
			$processor->setDS($shifted = array_shift($cliargs));
			break;
		case '-td':
			$processor->setTop100Depth(intval($shifted = array_shift($cliargs)));
			if (!preg_match('/^[0-9]+$/', $shifted)) echo "$cliarg must be followed by a number.\n"; exit(1);
			break;
		case '-n':
			$processor->setName($shifted = array_shift($cliargs));
			break;
		case '-l':
			$processor->setMaxLineLength(intval($shifted = array_shift($cliargs)));
			if (!preg_match('/^[0-9]+$/', $shifted)) echo "$cliarg must be followed by a number.\n"; exit(1);
			break;
		case '-q':
			$processor->setVerboseLevel(PROCESS_VERBOSE_QUIET);
			break;
		case '-v':
			$processor->setVerboseLevel(PROCESS_VERBOSE_HIGHER);
			break;
		case '-vv':
			$processor->setVerboseLevel(PROCESS_VERBOSE_HIGHEST);
			break;
		case '-fp':
			$processor->setIncludeFullPath(true);
			break;
		case '-su':
			$processor->setSuffix($shifted = array_shift($cliargs));
		default:
			$processor->setReportDir($cliarg);
			$processor->setFileList(array_shift($cliargs));
			$cliargs = array();
	}
	
	// If we shifted and found nothing, output an error.
	if (is_null($shifted)) {
		echo "Missing value after argument $cliarg\n".$syntax;
		exit(1);
	}
}

// Make sure the <reportdir> was set.
if (is_null($processor->getReportDir())) {
	if ($processor->getVerboseLevel() != PROCESS_VERBOSE_QUIET) echo "<reportdir> argument is missing\n".$syntax;
	exit(1);
}

// Read the file list from STDIN if it was not specified.
if (is_null($processor->getFileList())) {
	$processor->setFileList('php://stdin');
}

// Otherwise, make sure the <filelist> exists.
elseif (!is_file($processor->getFileList())) {
	if ($processor->getVerboseLevel() != PROCESS_VERBOSE_QUIET) echo "The <filelist> '" . $processor->getFileList() . "' does not exist or is not a file.\n";
	exit(1);
}

// Set the timezone.
if (!(function_exists("date_default_timezone_set") ? @(date_default_timezone_set($args['timezone'])) : @(putenv("TZ=".$args['timezone'])))) {
	if ($processor->getVerboseLevel() != PROCESS_VERBOSE_QUIET) echo "'timezone' config was set to an invalid identifier.\n";
	exit(1);
}

// Format the dates in $modifiedGroups.
for ($i = 0; $i < count($modifiedGroups); $i++) {
	$modifiedGroups[$i]['date'] = FormatDate($modifiedGroups[$i]['date'], $dateFormat);
}

function WarningHandler() {
	global $args;
	
	$args = func_get_args();
	$error = array_shift($args);
	
	if ($args[0] == PROCESS_WARN_WRITEFAIL) {
		if ($processor->getVerboseLevel() != PROCESS_VERBOSE_QUIET) echo 'Failed to write: ' . $args[1] . (isset($args[2]) ? ' for ' . $args[2] : '') . "\n";
	}
}

$processor->setSizeGroups($sizeGroups);
$processor->setModifiedGroups($modifiedGroups);
$processor->setWarningCallback('WarningHandler');

$ret = $processor->run();
if ($processor->getVerboseLevel() != PROCESS_VERBOSE_QUIET) {
	switch ($ret) {
		case PROCESS_FAILED_OPEN_FILELIST:
			echo "The <filelist> could not be opened.\n";
			break;
		case PROCESS_INVALID_REPORTDIR:
			echo "The <reportdir> already exists and is not a directory.\n";
			break;
		case PROCESS_INVALID_HEADER:
			echo "The header line in the <filelist> is invalid.\n";
			break;
		case PROCESS_FAILED_REPORTDIR_MKDIR:
			echo "The <reportdir> could not be created.\n";
			break;
		case PROCESS_INVALID_CHARACTERS:
			echo "<filelist> contains characters that are not UTF-8, Windows-1252 or ISO-8859-1.\n";
			break;
		case PROCESS_UNEXPECTED_HEADER:
			echo "<filelist> contains a header line in an unexpected locatoin. It must always be the first non-error line in the file.";
			break;
	}
}

exit($ret);
?>
Return current item: Disk Usage Report