MidCOM svn: r14197 - branches/MidCOM_2_8/fi.hut.staticdumps/bin

rambo midcom-commits at lists.midgard-project.org
Thu Jan 3 13:21:53 CET 2008


Author: rambo
Date: Thu Jan  3 13:21:53 2008
New Revision: 14197
URL: http://trac.midgard-project.org/software/changeset/14197

Log:
I guess these test scripts could be usefull to someone

Added:
   branches/MidCOM_2_8/fi.hut.staticdumps/bin/dump_redirects.php
   branches/MidCOM_2_8/fi.hut.staticdumps/bin/rename_gets.php

Added: branches/MidCOM_2_8/fi.hut.staticdumps/bin/dump_redirects.php
==============================================================================
--- (empty file)
+++ branches/MidCOM_2_8/fi.hut.staticdumps/bin/dump_redirects.php	Thu Jan  3 13:21:53 2008
@@ -0,0 +1,155 @@
+#!/usr/bin/php
+<?php
+ini_set('error_reporting', E_ALL);
+
+$wget_options = "-erobots=off -q -m -nH";
+$rsync_options = '-a';
+
+function better_die($msg)
+{
+    // Why oh why does not die() return with nonzero exit status ??
+    echo trim($msg) . "\n";
+    exit(1);
+}
+
+function test_command($cmd)
+{
+    $output = array();
+    $ret = 0;
+    exec($cmd, $output, $ret);
+    if ($ret !== 0)
+    {
+        return false;
+    }
+    return true;
+}
+
+if (!test_command('rsync --version'))
+{
+    better_die('rsync not executable in path');
+}
+if (!test_command('wget --version'))
+{
+    better_die('wget not executable in path');
+}
+require_once('midcom/lib/org/openpsa/httplib/nonmidcom.php');
+// NOTE: This way of forcing config only works with the nonmidcom mode of httplib, do not think you could use this elsewhere !
+$GLOBALS['midcom_component_data']['org.openpsa.httplib']['config']->options['http_timeout'] = 300;
+
+if (   !isset($argv[1])
+    || empty($argv[1]))
+{
+    better_die("First argument must be path to config file\n");
+}
+$conffile = $argv[1];
+if (   !is_readable($conffile)
+    && strpos('/', $conffile) !== false)
+{
+    // Try adding current path
+    $conffile = dirname($argv[0]) . "/{$argv[1]}";
+}
+if (!is_readable($conffile))
+{
+    better_die("File {$conffile} not readable\n");
+}
+
+eval('$sites_config = array(' . file_get_contents($conffile) . ');');
+foreach ($sites_config as $k => $site_config)
+{
+    // TODO: site locking so we can do multiple dumps in parallel
+    if (!isset($site_config['url']))
+    {
+        better_die("'url' not set for site {$k}");
+    }
+    if (!isset($site_config['dump_path']))
+    {
+        better_die("'dump_path' not set for site {$k}");
+    }
+    if (!is_writable($site_config['dump_path']))
+    {
+        better_die("{$site_config['dump_path']} is not writable");
+    }
+    
+
+    /** 
+     * Redirection folders
+     */
+    if (   array_key_exists('redirect_htaccess_suffix', $site_config) //might be null, still needs to be considered...
+        && isset($site_config['username'])
+        && isset($site_config['password']))
+    {
+        if (isset($site_config['redirect_htaccess_suffix']))
+        {
+            $suffix = $site_config['redirect_htaccess_suffix'];
+        }
+        else
+        {
+            $suffix = '';
+        }
+        $client = new org_openpsa_httplib();
+        $retries = 5;
+        do
+        {
+            echo "Fetching, {$retries} tries left\n";
+            $result = $client->get("{$site_config['url']}midcom-exec-fi.hut.staticdumps/redirect_folders.php", null, $site_config['username'], $site_config['password']);
+        }
+        while (   empty($result)
+               && $retries--);
+        if (!empty($result))
+        {
+            require_once('HTTP/Request.php');
+            $paths = explode("\n", $result);
+            foreach ($paths as $path)
+            {
+                $path = trim($path);
+                if (empty($path))
+                {
+                    continue;
+                }
+                $path_url = "{$site_config['url']}/{$path}";
+                $client =& new HTTP_Request($path_url);
+                $client->setMethod(HTTP_REQUEST_METHOD_HEAD);
+                $client->addHeader('User-Agent', org_openpsa_httplib::_user_agent());
+                $client->setBasicAuth($site_config['username'], $site_config['password']);
+                $response = $client->sendRequest();
+                if (PEAR::isError($response))
+                {
+                    continue;
+                }
+                $headers = $client->getResponseHeader();
+                if (   !isset($headers['location'])
+                    || empty($headers['location']))
+                {
+                    // Could not get valid redirection info
+                    continue;
+                }
+                // Clean double-slashes from the location (except for proto:// -part)
+                $headers['location'] = trim(preg_replace('%(?<!:)/{2,}%', '/', $headers['location']));
+                // Remove $site_config['url'] from beginning of location ??
+                $regex = '%^' . str_replace('.', '\.', $site_config['url']) . '%';
+                $redirect_to = preg_replace($regex, '/', $headers['location']);
+                $redirect_to = str_replace($site_config['url'], '/', $headers['location']);
+                $file_content = <<<EOD
+RewriteEngine On
+#This would work in global config file
+#RewriteRule ^/{$path}$ {$redirect_to} [R]
+#We use this in directory local one
+RewriteRule ^$ {$redirect_to} [R]
+EOD;
+                $file_path = "{$site_config['dump_path']}/{$path}.htaccess{$suffix}";
+                if (!file_exists(dirname($file_path)))
+                {
+                    $mkdir_cmd = 'mkdir -p ' . dirname($file_path);
+                    echo "executing: {$mkdir_cmd}\n";
+                    system($mkdir_cmd);
+                }
+                echo "Writing {$file_path}\n";
+                file_put_contents($file_path, $file_content);
+                unset($file_path, $file_content);
+            }
+        }
+    }
+
+}
+
+?>
\ No newline at end of file

Added: branches/MidCOM_2_8/fi.hut.staticdumps/bin/rename_gets.php
==============================================================================
--- (empty file)
+++ branches/MidCOM_2_8/fi.hut.staticdumps/bin/rename_gets.php	Thu Jan  3 13:21:53 2008
@@ -0,0 +1,91 @@
+#!/usr/bin/php
+<?php
+ini_set('error_reporting', E_ALL);
+
+$wget_options = "-erobots=off -q -m -nH";
+$rsync_options = '-a';
+
+function better_die($msg)
+{
+    // Why oh why does not die() return with nonzero exit status ??
+    echo trim($msg) . "\n";
+    exit(1);
+}
+
+function test_command($cmd)
+{
+    $output = array();
+    $ret = 0;
+    exec($cmd, $output, $ret);
+    if ($ret !== 0)
+    {
+        return false;
+    }
+    return true;
+}
+if (!test_command('find --version'))
+{
+    better_die('find not executable in path');
+}
+
+if (   !isset($argv[1])
+    || empty($argv[1]))
+{
+    better_die("First argument must be path to config file\n");
+}
+$conffile = $argv[1];
+if (   !is_readable($conffile)
+    && strpos('/', $conffile) !== false)
+{
+    // Try adding current path
+    $conffile = dirname($argv[0]) . "/{$argv[1]}";
+}
+if (!is_readable($conffile))
+{
+    better_die("File {$conffile} not readable\n");
+}
+
+eval('$sites_config = array(' . file_get_contents($conffile) . ');');
+foreach ($sites_config as $k => $site_config)
+{
+    // TODO: site locking so we can do multiple dumps in parallel
+    if (!isset($site_config['url']))
+    {
+        better_die("'url' not set for site {$k}");
+    }
+    if (!isset($site_config['dump_path']))
+    {
+        better_die("'dump_path' not set for site {$k}");
+    }
+    if (!is_writable($site_config['dump_path']))
+    {
+        better_die("{$site_config['dump_path']} is not writable");
+    }
+    
+
+    /**
+     * Rename files with GET parameters in the name
+     *
+     * So we can actually serve them, see documentation/USAGE 
+     * on how to configure mod_rewrite
+     */
+    $cmd = "find {$site_config['dump_path']} -name '*\?*'";
+    $output = array();
+    $ret = 0;
+    exec($cmd, $output, $ret);
+    if (   $ret === 0
+        && !empty($output))
+    {
+        foreach($output as $filepath)
+        {
+            list($filepart, $querypart) = explode('?', $filepath);
+            $newpath = dirname($filepart) . "/{$querypart}_" . basename($filepart);
+            $mv_cmd = "mv -f '{$filepath}' '{$newpath}'";
+            echo "executing: {$mv_cmd}\n";
+            system($mv_cmd);
+        }
+    }
+
+}
+
+?>
\ No newline at end of file


More information about the midcom-commits mailing list