//Originally made by sv:Användare:GameOn in 2011
using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using System.Collections;
using System.Xml;
using DotNetWikiBot;
using System.Threading;
using System.Net;
using System.Web;
class MyBot : Bot
{
private static string getURLPost(string uri, string parameters)
{
WebRequest webRequest = WebRequest.Create (uri);
byte[] bytes = Encoding.ASCII.GetBytes (parameters);
Stream outputStream = null;
webRequest.Method = "POST";
webRequest.ContentType = "application/x-www-form-urlencoded";
try
{
webRequest.ContentLength = bytes.Length;
outputStream = webRequest.GetRequestStream();
outputStream.Write(bytes, 0, bytes.Length);
}
catch (WebException ex)
{
Console.WriteLine(ex.Message + "Error with request!");
}
finally
{
if (outputStream != null)
outputStream.Close();
}
try
{
WebResponse webResponse = webRequest.GetResponse();
if (webResponse == null)
return null;
StreamReader myStreamReader = new StreamReader(webResponse.GetResponseStream());
return myStreamReader.ReadToEnd().Trim();
}
catch (WebException ex)
{
Console.WriteLine(ex.Message + "Error with response!");
}
return null;
}
private static bool isUrlWorking(string url)
{
Uri urlToCheck = new Uri(url);
WebRequest request = WebRequest.Create(urlToCheck);
WebResponse response;
request.Timeout = 10000;
try
{
response = request.GetResponse();
}
catch (Exception)
{
return false;
}
String responseUrl = response.ResponseUri.ToString();
if (String.Compare(responseUrl, urlToCheck.ToString(), true) != 0)
return !(responseUrl.IndexOf("404.php") > -1 || responseUrl.IndexOf("404.htm") > -1 || responseUrl.IndexOf("500.php") > -1 || responseUrl.IndexOf("500.htm") > -1);
else
return true;
}
public static void Main()
{
Site site = new Site("http://sv.wikipedia.org", user, password);
String emailStr = email;
String todaysDateStr = "26 maj 2011";
String deadlinkTemplateStr = "{{död länk|datum=2011-05}}";
String editCommentStr = "Lägger in arkiveringsurl i webbref alt. dödmarkerar länk";
String archiveUrlStr = "arkivurl";
String archiveDateStr = "arkivdatum";
String categoryStr = "Artiklar med Webbref som saknar arkiveringsurl";
String tmpStr;
String url;
String orgStr;
String newStr;
bool changed;
int stopPos;
PageList pl = new PageList(site);
long antal = 18;
//Find articles among the recentchanges
// pl.FillFromRecentChanges(false, false, false, false, true, 1000, 7);
//Find articles from a category
pl.FillFromCategory(categoryStr);
//Find articles from all the links to a template, mostly useful on very small wikis
// pl.FillFromLinksToPage("Mall:Webbref");
//Remove namespaces, changes required (or uncomment) for some sisterprojects, like wikisource
pl.FilterNamespaces(new int[] {0});
foreach(Page myPage in pl)
{
changed = false;
myPage.Load();
tmpStr = myPage.text;
String[] tmpStrArray = myPage.GetTemplatesWithParams();
foreach(string tmpStr2 in tmpStrArray)
{
//handle several types, webref common on svnews, webbref on other sv-projects
//Todo: Handle uppercase as well
if(tmpStr2.StartsWith("webbref") || tmpStr2.StartsWith("webref") || tmpStr2.StartsWith("citeweb") || tmpStr2.StartsWith("cite web"))
{
//If we already have an archive added skip to next template
if(tmpStr2.IndexOf(archiveUrlStr) > 0)
continue;
changed = true;
orgStr = tmpStr2;
url = tmpStr2.Substring(tmpStr2.IndexOf("|url=") + 5);
stopPos = url.IndexOf("|");
//Cut off the rest, unless it's the last argument
if(stopPos > 0)
url = url.Substring(0, stopPos);
Console.WriteLine(url);
//Fix some specialcases regarding URLs
url = HttpUtility.UrlDecode(url);
url = url.Trim();
Console.WriteLine(url);
//verify that the page isn't dead first, if it is mark it as such
if(isUrlWorking(url))
{
//Todo: Improve this later on so it's more robust for changes on webcitation.org
tmpStr = getURLPost("http://www.webcitation.org/archive.php", "url=" + url + "&email=" + emailStr);
tmpStr = tmpStr.Substring(tmpStr.IndexOf("An archive of this page should shortly be available at </p><br /><p>") + 83, 29);
newStr = orgStr.Replace("|url=" + url, "|url=" + url + "|" + archiveUrlStr + "=http://" + tmpStr + "|" + archiveDateStr + "=" + todaysDateStr);
myPage.text = myPage.text.Replace(orgStr, newStr);
antal++;
}
else
{
//Add {{dead link}}
Console.WriteLine("Dead link found!");
newStr = orgStr + deadlinkTemplateStr;
myPage.text = myPage.text.Replace(orgStr, newStr);
}
}
}
if(changed)
{
myPage.Save(myPage.text, editCommentStr, true);
Console.WriteLine("Nytt antal = " + antal.ToString());
}
}
}
}