User:PeerInfinity/Scripts/SyncArticleLinks.php

<?php
// SyncArticleLinks.php // synchronize the list of article links on the concept pages with the list of concepts on the "All Articles" pages
// for the latest version of this script's output, see:
///stderr output - the stderr output of this script
///SyncArticleLinksOutput.txt - the output written to the text file, containing the Sync results
//

$stderr = fopen( "php://stderr", "wt" );

//*

$XMLfile = "daily_XML_dump.xml";


// the arrays for the data read from the All Articles pages:

// it would be more proper to create a struct for this, but for now I'll just do it the quick and dirty way

$NextAllArticleIndex    = 0;

// these arrays are indexed by $NextAllArticleIndex

$ArrayAllArticleTitle              = array();
$ArrayAllArticleLink               = array();
$ArrayAllArticleIndexedConcepts    = array(); // this is an array of arrays!
$ArrayAllArticleFoundConcepts      = array(); // this is an array of arrays!
$ArrayAllArticleNotIndexedConcepts = array(); // this is an array of arrays!
$ArrayAllArticleAuthor             = array();
$ArrayAllArticleDate               = array(); // currently unused, This would need to be read by following the link to the article, which might be a good idea to implement eventually
$ArrayAllArticleOfficialSummaries  = array(); // this is an array of arrays!  //this one is indexed by article name!
$ArrayAllArticleUsedSummaries      = array(); // this is an array of arrays!


// the array for all Concepts found

// these arrays are indexed by concept title

$ConceptFound = array();
$ConceptNotInIndex = array();
$ConceptThatAreRedirects = array();
$ConceptSeeAlso = array(); // this is an array of arrays!

// these arrays are indexed by concept title

$PagesWithOvercomingBiasLinks = array();
$PagesWithComments = array();
$PagesWithOvercomingBiasArticlesHeader = array();
$PagesWithExternalReferences = array();
$PagesWithSeeAlso = array();
$PagesWithExternalAuthorLinks = array();
$PagesWithNewlineAfterWikiLink = array();
$PagesWithSeeAlsoBeforeBlogPosts = array();

// ugh... this is ugly:

// these arrays are indexed by the index variables below

$ArticleLinksWithoutEndingSlash = array();
$ArticleLinksWithWrongTitle = array();
$ArticleLinksWithoutAuthor = array();
$ArticleLinksWithAvailableSummary = array();

$ArticleLinksWithoutEndingSlashConcept = array();
$ArticleLinksWithWrongTitleConcept = array();
$ArticleLinksWithoutAuthorConcept = array();
$ArticleLinksWithAvailableSummaryConcept = array();

$ArticleLinksWithoutEndingSlashNextIndex = 0;
$ArticleLinksWithWrongTitleNextIndex = 0;
$ArticleLinksWithoutAuthorConceptNextIndex = 0;
$ArticleLinksWithAvailableSummaryIndex = 0;


$CurrentTitle = "";
$CurrentBody = "";

$IsReadingTitle      = false;
$IsReadingText       = false;

$IsReadingAllArticlesPages = false;
$IsReadingSummariesPages   = false;
$IsReadingConceptPages     = false;

$IsReadingOneAllArticlesPage = false;
$IsReadingOneSummariesPage   = false;
$IsReadingOneConceptPage     = false;

$FirstYearToRead = 2006;
$FinalYearToRead = 2010; //todo - update this in 2011!!!

$NextYearToRead = $FirstYearToRead;

$YearCurrentlyBeingRead = $FirstYearToRead;

$SuccessfullyReadOneAllArticlesPage = false;
$SuccessfullyReadOneSummariesPage   = false;
$FailedToReadAllArticlesPage        = false;

$SuccessfullyReadOneConceptPage = false;
$FailedToReadConceptPage        = false;

$PagesRead = 0;

$Debug = true;


// for doxygen
$fontsize = 12;

// to make the script easier to read, pad each section to a specific number of characters, to make everything line up
$PaddingValue1 = 100;
$PaddingValue2 = 50;




//open a text file for the output of this script
//the script will send its output both to this text file and to stdout (or was it stderr?)

$fp = fopen('SyncArticleLinksOutput.txt', 'w');





// now for some code that I still haven't figured out a good way to untangle
// this processing currently needs to be done in these functions, and can't be moved to a more sensible place


function startElement($parser, $name, $attribs)
{
	global $CurrentTitle ;
	global $CurrentBody  ;
	
	global $IsReadingTitle      ;
	global $IsReadingText       ;
	
	global $stderr;
	
	// remember what element we're reading, so that we know what to do in characterData()
	
	if( $name == "TITLE" )
	{
		$IsReadingTitle = true;
	}
	else
	{
		$IsReadingTitle = false;
	}
	
	if( $name == "TEXT" )
	{
		$IsReadingText = true;
	}
	else
	{
		$IsReadingText = false;
	}
}

function characterData($parser, $data)
{
	global $stderr;

	global $XMLfile;
	
	global $NextAllArticleIndex;

	global $ArrayAllArticleTitle             ;
	global $ArrayAllArticleLink              ;
	global $ArrayAllArticleIndexedConcepts   ;
	global $ArrayAllArticleFoundConcepts     ;
	global $ArrayAllArticleNotIndexedConcepts;
	global $ArrayAllArticleAuthor            ;
	global $ArrayAllArticleDate              ;
	global $ArrayAllArticleOfficialSummaries ;
	global $ArrayAllArticleUsedSummaries     ;

	global $ConceptFound;
	global $ConceptNotInIndex;
	global $ConceptThatAreRedirects;
	global $ConceptSeeAlso;
	
	global $PagesWithOvercomingBiasLinks;
	global $PagesWithComments;
	global $PagesWithOvercomingBiasArticlesHeader;
	global $PagesWithExternalReferences;
	global $PagesWithSeeAlso;
	global $PagesWithExternalAuthorLinks;
	global $PagesWithNewlineAfterWikiLink;
	global $PagesWithSeeAlsoBeforeBlogPosts;

	global $ArticleLinksWithoutEndingSlash;
	global $ArticleLinksWithWrongTitle;
	global $ArticleLinksWithoutAuthor;
	global $ArticleLinksWithAvailableSummary;

	global $ArticleLinksWithoutEndingSlashConcept;
	global $ArticleLinksWithWrongTitleConcept;
	global $ArticleLinksWithoutAuthorConcept;
	global $ArticleLinksWithAvailableSummaryConcept;

	global $ArticleLinksWithoutEndingSlashNextIndex;
	global $ArticleLinksWithWrongTitleNextIndex;
	global $ArticleLinksWithoutAuthorNextIndex;
	global $ArticleLinksWithAvailableSummaryNextIndex;

	global $CurrentTitle;
	global $CurrentBody  ;

	global $IsReadingTitle;
	global $IsReadingText ;

	global $IsReadingAllArticlesPages;
	global $IsReadingSummariesPages  ;
	global $IsReadingConceptPages    ;

	global $IsReadingOneAllArticlesPage;
	global $IsReadingOneSummariesPage  ;
	global $IsReadingOneConceptPage    ;

	global $FirstYearToRead;
	global $FinalYearToRead;

	global $NextYearToRead;

	global $YearCurrentlyBeingRead;

	global $SuccessfullyReadOneAllArticlesPage;
	global $SuccessfullyReadOneSummariesPage  ;
	global $FailedToReadAllArticlesPage       ;

	global $SuccessfullyReadOneConceptPage;
	global $FailedToReadConceptPage;
	
	global $PagesRead;
	
	global $Debug;





	//open a text file for the output of this script
	//the script will send its output both to this text file and to stdout (or was it stderr?)

	$fp = fopen('SyncArticleLinksOutput.txt', 'w');



	
	
	
	// if we're reading the title, then remember the title, and check if we want to read the page content
	
	if( $IsReadingTitle )
	{
		$CurrentBody = "";
		
		$CurrentTitle = $data;
		$IsReadingTitle = false;

		$IsReadingOneAllArticlesPage = false;
		$IsReadingOneSummariesPage   = false;
		$IsReadingOneConceptPage     = false;
		
		// skip category pages, template pages, etc.

		if( substr_count($CurrentTitle, "Category:") > 0 )
		{
		}
		else if( substr_count($CurrentTitle, "Template:") > 0 )
		{
		}
		else if( substr_count($CurrentTitle, "Talk:") > 0 )
		{
		}
		else if( substr_count($CurrentTitle, "Category talk:") > 0 )
		{
		}
		else if
			(
				$CurrentTitle == "Catch Phrases" ||
				$CurrentTitle == "Categories" ||
				$CurrentTitle == "Chat Logs/2009-04-11" ||
				$CurrentTitle == "Disagreements on Less Wrong" ||
				$CurrentTitle == "Series" ||
				$CurrentTitle == "Using the wiki" ||
				$CurrentTitle == "Acronyms used on Less Wrong" ||
				$CurrentTitle == "Less Wrong/Errors from moving Eliezer's posts from OB to LW" ||
				false
			)
		{
		}
		else
		{
			if( $IsReadingAllArticlesPages && $NextYearToRead <= $FinalYearToRead )
			{
				// check if the page title is the next year we want to read
				if
					(
						( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles") > 0 ) ||
						( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles") > 0 ) ||
						( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles") > 0 ) ||
						( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles") > 0 ) ||
						( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles") > 0 )
						//todo - update this in 2011!!!
					)
				{
					$YearCurrentlyBeingRead = $NextYearToRead;
					$NextYearToRead++;
					$IsReadingOneAllArticlesPage = true;

					fwrite( $stderr, "Processing the wikipage for $YearCurrentlyBeingRead \n\n" );
				}
			}
			else if( $IsReadingSummariesPages && $NextYearToRead <= $FinalYearToRead )
			{
				// check if the page title is the next year we want to read
				if
					(
						( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles/Summaries") > 0 ) ||
						( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles/Summaries") > 0 ) ||
						( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles/Summaries") > 0 ) ||
						( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles/Summaries") > 0 ) ||
						( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles/Summaries") > 0 )
						//todo - update this in 2011!!!
					)
				{
					$YearCurrentlyBeingRead = $NextYearToRead;
					$NextYearToRead++;
					$IsReadingOneSummariesPage = true;

					fwrite( $stderr, "Processing the summaries page for $YearCurrentlyBeingRead \n\n" );
				}
			}
			else if( $IsReadingConceptPages )
			{
				// don't process the All Articles pages!
				if
					(
						substr_count($CurrentTitle, "Less Wrong/2006 Articles") <= 0 &&
						substr_count($CurrentTitle, "Less Wrong/2007 Articles") <= 0 &&
						substr_count($CurrentTitle, "Less Wrong/2008 Articles") <= 0 &&
						substr_count($CurrentTitle, "Less Wrong/2009 Articles") <= 0 &&
						substr_count($CurrentTitle, "Less Wrong/2010 Articles") <= 0
						//todo - update this in 2011!!!
					)
				{
					// no special processing here, just remember the title

					$IsReadingOneConceptPage     = true;


					if( $PagesRead % 100 == 0 )
					{
						fwrite( $stderr, "Pages read: $PagesRead\n" );
					}

					//fwrite( $stderr, "Found the article $CurrentTitle \n\n" );

					$PagesRead++;
				}
			}
			else if( $IsReadingAllArticlesPages )
			{
				//fwrite( $stderr, "reading All Articles pages, past the final year\n\n" );
			}
			else if( $IsReadingSummariesPages )
			{
				//fwrite( $stderr, "reading summary pages, past the final year\n\n" );
			}
			else
			{
				fwrite( $stderr, "Error: not reading all articles, summaries, or concept pages\n\n" );
			}
		}
	}
	
	// if we're reading the text, then store the content of the article
	// we'll process the data in endElement()
	
	// there is probably a more efficient way to do this
	
	if( $IsReadingText )
	{
		if( $IsReadingOneAllArticlesPage )
		{
			$CurrentBody .= $data;
		}
		
		if( $IsReadingOneSummariesPage )
		{
			$CurrentBody .= $data;
		}
		
		if( $IsReadingOneConceptPage )
		{
			$CurrentBody .= $data;
		}
	}
}

function endElement($parser, $name)
{
	global $stderr;

	global $XMLfile;
	
	global $NextAllArticleIndex;

	global $ArrayAllArticleTitle             ;
	global $ArrayAllArticleLink              ;
	global $ArrayAllArticleIndexedConcepts   ;
	global $ArrayAllArticleFoundConcepts     ;
	global $ArrayAllArticleNotIndexedConcepts;
	global $ArrayAllArticleAuthor            ;
	global $ArrayAllArticleDate              ;
	global $ArrayAllArticleOfficialSummaries ;
	global $ArrayAllArticleUsedSummaries     ;

	global $ConceptFound;
	global $ConceptNotInIndex;
	global $ConceptThatAreRedirects;
	global $ConceptSeeAlso;
	
	global $PagesWithOvercomingBiasLinks;
	global $PagesWithComments;
	global $PagesWithOvercomingBiasArticlesHeader;
	global $PagesWithExternalReferences;
	global $PagesWithSeeAlso;
	global $PagesWithExternalAuthorLinks;
	global $PagesWithNewlineAfterWikiLink;
	global $PagesWithSeeAlsoBeforeBlogPosts;

	global $ArticleLinksWithoutEndingSlash;
	global $ArticleLinksWithWrongTitle;
	global $ArticleLinksWithoutAuthor;
	global $ArticleLinksWithAvailableSummary;

	global $ArticleLinksWithoutEndingSlashConcept;
	global $ArticleLinksWithWrongTitleConcept;
	global $ArticleLinksWithoutAuthorConcept;
	global $ArticleLinksWithAvailableSummaryConcept;

	global $ArticleLinksWithoutEndingSlashNextIndex;
	global $ArticleLinksWithWrongTitleNextIndex;
	global $ArticleLinksWithoutAuthorNextIndex;
	global $ArticleLinksWithAvailableSummaryNextIndex;

	global $CurrentTitle;
	global $CurrentBody  ;

	global $IsReadingTitle;
	global $IsReadingText ;

	global $IsReadingAllArticlesPages;
	global $IsReadingSummariesPages  ;
	global $IsReadingConceptPages    ;

	global $IsReadingOneAllArticlesPage;
	global $IsReadingOneSummariesPage  ;
	global $IsReadingOneConceptPage    ;

	global $FirstYearToRead;
	global $FinalYearToRead;

	global $NextYearToRead;

	global $YearCurrentlyBeingRead;

	global $SuccessfullyReadOneAllArticlesPage;
	global $SuccessfullyReadOneSummariesPage ;
	global $FailedToReadAllArticlesPage      ;

	global $SuccessfullyReadOneConceptPage;
	global $FailedToReadConceptPage;
	
	global $PagesRead;
	
	global $Debug;
	
	
	// now process all the text that we read
	
	if( $name == "TEXT" )
	{
		// this check is probably unnecessary
		if( $IsReadingText )
		{
			if( $IsReadingOneAllArticlesPage )
			{
				//fwrite( $stderr, "-------------------------------------------reading body of year $YearCurrentlyBeingRead , $CurrentTitle \n\n" );

				//fwrite( $stderr, $CurrentBody );
				
				// now parse the data from this page and store it into the arrays
				
				$CurrentGroupStartPos = 0;
				
				// skip to the first '|-'
				$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 );
				
				// keep going until there are no more groups
				while( $NextGroupStartPos !== FALSE && $CurrentGroupStartPos != $NextGroupStartPos )
				{
					// find the start and end of teh current group
					$CurrentGroupStartPos = $NextGroupStartPos;
					
					$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 );
					
					// if there is no next group, then set the end of this group to the end of the whole string
					if( $NextGroupStartPos === FALSE )
					{
						$NextGroupStartPos = strlen( $CurrentBody ) - 1;
					}
					
					// now find and store the parts
					
					// there is probably a much simpler way to do all this using regexes
					
					$DataIsValid = true;
					
					$CurrentArticleConceptArray = array();
					
					$TokenStartPos = strpos($CurrentBody, "[", $CurrentGroupStartPos) + 1;
					$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);
					
					$CurrentArticleLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
					
					if( substr_count( $CurrentArticleLink, "http://" ) <= 0 )
					{
						$DataIsValid = false;
					}


					$TokenStartPos = $TokenEndPos + 1;
					$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos);
					
					$CurrentArticleTitle = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
					
					$CurrentArticleTitle = str_replace(""", "\"", $CurrentArticleTitle);

					
					$NextLineStartPos = strpos($CurrentBody, "|", $TokenEndPos);
					$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1);
					
					$ConceptLine = substr( $CurrentBody, $NextLineStartPos, $NextLineEndPos-$NextLineStartPos );

					//fwrite( $stderr, "concept line start: $NextLineStartPos \n concept line end: $NextLineEndPos \n concept line: $ConceptLine\n\n" );
					
					$FinishedFindingConcepts = false;
					
					$ConceptIndex = 0;

					$TokenStartPos = 0;
					$TokenEndPos = 0;
					
					while( !$FinishedFindingConcepts )
					{
						$TokenStartPos = strpos($ConceptLine, "[[", $TokenEndPos);
						$TokenEndPos = strpos($ConceptLine, "]]", $TokenStartPos);
						
						$CurrentConcept = substr( $ConceptLine, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 );

						if
							(
								$TokenEndPos > $TokenStartPos &&
								substr_count($CurrentConcept, "[[") <= 0 &&
								substr_count($CurrentConcept, "]]") <= 0
								)
						{
							//fwrite( $stderr, "found concept: $CurrentConcept\n\n" );
							
							$CurrentArticleConceptArray[$ConceptIndex] = $CurrentConcept;
							
							$ConceptIndex++;
							
							$FinishedFindingConcepts = false;
						}
						else
						{
							$FinishedFindingConcepts = true;
						}

						//fwrite( $stderr, "concept start: $TokenStartPos \n concept end: $TokenEndPos \n concept index: $ConceptIndex \n concept line: $ConceptLine\n\n" );
					}

					$NextLineStartPos = strpos($CurrentBody, "|", $NextLineEndPos);
					$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1);

					$TokenStartPos = strpos($CurrentBody, "[", $NextLineStartPos);
					$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos);

					$CurrentArticleAuthor = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos+1 );
					
					if( $DataIsValid )
					{
						$ArrayAllArticleTitle              [$NextAllArticleIndex] = $CurrentArticleTitle;

						$ArrayAllArticleLink               [$NextAllArticleIndex] = $CurrentArticleLink;

						//$CurrentArticleConceptArray[] = "concept1";

						$ArrayAllArticleIndexedConcepts    [$NextAllArticleIndex] = $CurrentArticleConceptArray;

						$ArrayAllArticleFoundConcepts      [$NextAllArticleIndex] = array();

						$ArrayAllArticleNotIndexedConcepts [$NextAllArticleIndex] = array();

						$ArrayAllArticleAuthor             [$NextAllArticleIndex] = $CurrentArticleAuthor;

						$ArrayAllArticleDate               [$NextAllArticleIndex] = "date";

						$ArrayAllArticleUsedSummaries      [$NextAllArticleIndex] = array();

						//$TempString = print_r($ArrayAllArticleIndexedConcepts[$NextAllArticleIndex]);

						//fwrite( $stderr, "start: $CurrentGroupStartPos \n end: $NextGroupStartPos \n index: $NextAllArticleIndex \n $ArrayAllArticleTitle[$NextAllArticleIndex] \n $ArrayAllArticleLink[$NextAllArticleIndex] \n $TempString \n $ArrayAllArticleAuthor[$NextAllArticleIndex] \n $ArrayAllArticleDate[$NextAllArticleIndex] \n\n" );

						$NextAllArticleIndex++;
					}
				}

				$SuccessfullyReadOneAllArticlesPage = true;
			}
			
			if( $IsReadingOneSummariesPage )
			{
				//fwrite( $stderr, "reading summaries page for $YearCurrentlyBeingRead \n\n" );

				$NextSummaryTitleStart = 0;
				$NextSummaryTitleEnd = 0;
				$NextSummaryTextBlockStart = 0;
				$NextSummaryTextBlockEnd = 0;
				$NextSummaryTextStart = 0;
				$NextSummaryTextEnd = 0;
				
				$AlternateSummaryCount = 0;
				
				$FinishedReadingSummaries = false;

				while( !$FinishedReadingSummaries )
				{
					$NextSummaryTitleStart = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd);

					//fwrite( $stderr, "CurrentBody: $CurrentBody \n\n" );
					//fwrite( $stderr, "NextSummaryTitleStart: $NextSummaryTitleStart \n\n" );
				
					if( $NextSummaryTitleStart === FALSE )
					{
						$FinishedReadingSummaries = true;
					}
					else
					{
						$NextSummaryTitleStart += 6;
						
						$NextSummaryTitleEnd = strpos($CurrentBody, "]=====", $NextSummaryTitleStart);
						
						$FirstSpacePos = strpos($CurrentBody, " ", $NextSummaryTitleStart);
						
						$CurrentSummaryLink = substr( $CurrentBody, $NextSummaryTitleStart, $FirstSpacePos-$NextSummaryTitleStart );
						
						$CurrentSummaryTitle = substr( $CurrentBody, $FirstSpacePos+1, $NextSummaryTitleEnd-$FirstSpacePos-1 );

						$CurrentSummaryTitle = str_replace(""", "\"", $CurrentSummaryTitle);

						//fwrite( $stderr, "reading summary of $CurrentSummaryTitle \n\n" );
						
						$NextSummaryTextBlockStart = $NextSummaryTitleEnd + 8;
						
						$NextSummaryTextBlockEnd = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd);
						
						if( $NextSummaryTextBlockEnd === FALSE )
						{
							$NextSummaryTextBlockEnd = strlen($CurrentBody);
						}
						
						$NextSummaryTextBlock = substr( $CurrentBody, $NextSummaryTextBlockStart, $NextSummaryTextBlockEnd-$NextSummaryTextBlockStart );

						$AlternateSummaryCount = substr_count( $NextSummaryTextBlock, "(alternate summary:)" );

						$NextSummaryTextEnd = 0;
						
						$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle] = array();
						
						for( $AlternateSummaryNum = 0; $AlternateSummaryNum < $AlternateSummaryCount; $AlternateSummaryNum++ )
						{
							$NextSummaryTextStart = $NextSummaryTextEnd;
							$NextSummaryTextEnd = strpos( $NextSummaryTextBlock, "\n\n(alternate summary:)\n\n", $NextSummaryTextStart );
							
							$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart, $NextSummaryTextEnd-$NextSummaryTextStart );
							
							$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryNum] = $SummaryToAdd;
							
							$NextSummaryTextEnd += strlen( "\n\n(alternate summary:)\n\n" );
						}

						$NextSummaryTextStart = $NextSummaryTextEnd;
						
						$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart );
						
						$SummaryToAdd = trim( $SummaryToAdd, "\n" );
						
						if( strlen( $SummaryToAdd ) > 0 )
						{
							if( strpos( $SummaryToAdd, "__NOTOC__" ) !== FALSE )
							{
								//todo - decide how to deal with this case!
							}
							else
							{
								$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryCount] = $SummaryToAdd;
							}
						}
					}
				}



				$SuccessfullyReadOneSummariesPage = true;
			}

			if( $IsReadingOneConceptPage )
			{
				//fwrite( $stderr, "------reading body of $CurrentTitle \n\n" );
				
				// don't bother processing the page unless it has a "Blog posts" section

				$TokenStartPos = 0;
				$TokenEndPos = 0;

	
				// keep track of which pages have links to OvercomingBias.com articles
				
				if( substr_count( $CurrentBody, "http://www.overcomingbias.com/20" ) > 0 )
				{
					// the following pages are "allowed" to have OvercomingBias.com articles - these pages were already checked manually for links that should point to lesswrong.com instead
					if
						(
							$CurrentTitle == "Aumann's agreement theorem" ||
							$CurrentTitle == "Bias" ||
							$CurrentTitle == "Bite the bullet" ||
							$CurrentTitle == "Black swan" ||
							$CurrentTitle == "Catch Phrases" ||
							$CurrentTitle == "Chat Logs/2009-04-11" ||
							$CurrentTitle == "Cognitive style" ||
							$CurrentTitle == "Coherence" ||
							$CurrentTitle == "Connotation" ||
							$CurrentTitle == "Consistency" ||
							$CurrentTitle == "Cryonics" ||
							$CurrentTitle == "Dark arts" ||
							$CurrentTitle == "Disagreement" ||
							$CurrentTitle == "Disagreements on Less Wrong" ||
							$CurrentTitle == "Emotion" ||
							$CurrentTitle == "Extraordinary evidence" ||
							$CurrentTitle == "Forecast" ||
							$CurrentTitle == "Hypocrisy" ||
							$CurrentTitle == "Impossible world" ||
							$CurrentTitle == "Intellectual roles" ||
							$CurrentTitle == "Likelihood ratio" ||
							$CurrentTitle == "Meme lineage" ||
							$CurrentTitle == "Modesty argument" ||
							$CurrentTitle == "Near/far thinking" ||
							$CurrentTitle == "Overcoming Bias" ||
							$CurrentTitle == "Overconfidence" ||
							$CurrentTitle == "Prediction market" ||
							$CurrentTitle == "Scales of justice fallacy" ||
							$CurrentTitle == "Series" ||
							$CurrentTitle == "Signaling" ||
							$CurrentTitle == "Signalling" ||
							$CurrentTitle == "Status" ||
							$CurrentTitle == "Stereotype" ||
							false
						)
					{
						// do nothing
					}
					else
					{
						$PagesWithOvercomingBiasLinks[$CurrentTitle] = true;
					}
				}

				if( substr_count( $CurrentBody, "<!--" ) > 0 )
				{
					// the following pages are "allowed" to have comments
					if
						(
							$CurrentTitle == "LessWrong Wiki"
						)
					{
						// do nothing
					}
					else
					{
						$PagesWithComments[$CurrentTitle] = true;
					}
				}

				if( substr_count( $CurrentBody, "==Overcoming Bias Articles==" ) > 0 )
				{
					$PagesWithOvercomingBiasArticlesHeader[$CurrentTitle] = true;
				}

				if( substr_count( $CurrentBody, "External references" ) > 0 )
				{
					$PagesWithExternalReferences[$CurrentTitle] = true;
				}

				if( substr_count( $CurrentBody, "See Also" ) > 0 )
				{
					$PagesWithSeeAlso[$CurrentTitle] = true;
				}
				
				if( substr_count( $CurrentBody, "by [http" ) > 0 )
				{
					$PagesWithExternalAuthorLinks[$CurrentTitle] = true;
				}
				
				if( substr_count( $CurrentBody, "wikilink}}\n\n" ) > 0 )
				{
					$PagesWithNewlineAfterWikiLink[$CurrentTitle] = true;
				}
				
				$SeeAlsoPos = strpos($CurrentBody, "==See also==");
				$BlogPostsPos = strpos($CurrentBody, "==Blog posts==");
				
				if( $SeeAlsoPos !== FALSE && $BlogPostsPos !== FALSE )
				{
					if( $SeeAlsoPos < $BlogPostsPos )
					{
						$PagesWithSeeAlsoBeforeBlogPosts[$CurrentTitle] = true;
					}
				}
				

				if( substr_count( $CurrentBody, "#REDIRECT" ) > 0 )
				{
					if( array_key_exists( $CurrentTitle, $ConceptFound ) )
					{
						$ConceptThatAreRedirects[$CurrentTitle] = true;
					}
				}


				//todo - reconsider this!!!
				//if( substr_count( $CurrentArticleLink, "==Blog posts==" ) > 0 )
				if( substr_count( $CurrentBody, "http://lesswrong.com/lw/" ) > 0 )
				{
					// first check if the concept exists in the index

					$ConceptIsInIndex = false;

					if( array_key_exists( $CurrentTitle, $ConceptFound ) )
					{
						$ConceptIsInIndex = true;
					}

					if( $ConceptIsInIndex )
					{
						// if the concept exists in the index, then mark the concept as found
						
						$ConceptFound[$CurrentTitle] = true;
						
						//fwrite( $stderr, "++++++found: $CurrentTitle\n\n" );
					}
					else
					{
						// if the concept doesn't exist in the index, then mark the concept as not found
						
						$ConceptNotInIndex[$CurrentTitle] = true;
					}
					
					
					// now keep track of the "See also" links
					if( array_key_exists($CurrentTitle, $ConceptFound ) )
					{
						if( $ConceptFound[$CurrentTitle] == true )
						{
							$SeeAlsoStartPos = strpos($CurrentBody, "==See also==", 0);

							if( $SeeAlsoStartPos !== FALSE )
							{
								$SeeAlsoStartPos += 12;
							}
							else
							{
								$SeeAlsoStartPos = strpos($CurrentBody, "==Related concepts==", 0);

								if( $SeeAlsoStartPos !== FALSE )
								{
									$SeeAlsoStartPos += 20;
								}
								else
								{
								}
							}

							if( $SeeAlsoStartPos !== FALSE )
							{
								$SeeAlsoEndPos = strpos($CurrentBody, "==", $SeeAlsoStartPos);
								
								if( $SeeAlsoEndPos === FALSE )
								{
									$SeeAlsoEndPos = strlen($CurrentBody);
								}

								$TokenStartPos = $SeeAlsoStartPos;
								$TokenEndPos = $SeeAlsoStartPos;

								$DoneSeeAlso = false;

								$EntriesFound = 0;

								$ConceptSeeAlso[$CurrentTitle] = array();

								//fwrite( $stderr, "Concept: " . $CurrentTitle . "\n" );

								while( !$DoneSeeAlso )
								{
									$TokenStartPos = strpos($CurrentBody, "[[", $TokenEndPos);
									$TokenEndPos = strpos($CurrentBody, "]]", $TokenStartPos);

									if
										(
											$TokenStartPos < $SeeAlsoEndPos &&
											$TokenStartPos >= $SeeAlsoStartPos &&
											$TokenStartPos !== FALSE &&
											$TokenEndPos !== FALSE
										)
									{
										$CurrentSeeAlso = substr( $CurrentBody, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 );
										
										if( substr_count( $CurrentSeeAlso, "Category:") <= 0 )
										{
											$ConceptSeeAlso[$CurrentTitle][$EntriesFound] = $CurrentSeeAlso;

											//fwrite( $stderr, $ConceptSeeAlso[$CurrentTitle][$EntriesFound] . "\n" );

											$EntriesFound++;
										}
									}
									else
									{
										$DoneSeeAlso = true;
									}

									//fwrite( $stderr, "SeeAlsoStartPos: $SeeAlsoStartPos SeeAlsoEndPos: $SeeAlsoEndPos TokenStartPos: $TokenStartPos TokenEndPos $TokenEndPos" . "\n" );
								}
							}
						}
					}
					


					$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
					$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);

					$NextTokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
					
					if( $NextTokenStartPos === FALSE )
					{
						$NextTokenStartPos = strlen($CurrentBody);
					}

					$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
					
					$FinishedFindingLinks = false;

					while( !$FinishedFindingLinks )
					{
						// check if the link appears before or after the first header
						
						$FirstHeaderPos = strpos($CurrentBody, "==", 0);

						$LinkIsAfterHeader = false;

						if( $FirstHeaderPos === FALSE )
						{
							$LinkIsAfterHeader = false;
						}
						else
						{
							if( $FirstHeaderPos < $TokenStartPos )
							{
								$LinkIsAfterHeader = true;
							}
							else
							{
								$LinkIsAfterHeader = false;
							}
						}



						// ignore the following links:
						
						if
							(
								substr_count( $CurrentLink, "http://lesswrong.com/lw/b1/persuasiveness_vs_soundness/789") <= 0 &&
								substr_count( $CurrentLink, "http://lesswrong.com/lw/14v/the_usefulness_of_correlations/11iu") <= 0
							)

						{
							// check if the link ends with a /

							$LastCharInLink = substr( $CurrentLink, -1 );
							
							$SlashCountBeforeAdd = substr_count( $CurrentLink, "/");

							if
								(
									strcmp( $LastCharInLink, "/" ) != 0
								)
							{
								$CurrentLink .= "/";
							}



							// check if the link's title is correct

							$TitleStartPos = $TokenEndPos+1;
							$TitleEndPos = strpos($CurrentBody, "]", $TitleStartPos);

							$CurrentArticleTitle = substr( $CurrentBody, $TitleStartPos, $TitleEndPos-$TitleStartPos );

							$CorrectArticleTitle = "(not found)";
							$CorrectArticleAuthor = "(not found)";

							$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );

							if( $SearchResult !== FALSE )
							{
								$CorrectArticleTitle  = $ArrayAllArticleTitle [$SearchResult];
								$CorrectArticleAuthor = $ArrayAllArticleAuthor[$SearchResult];
								
								
								
								//don't do any processing for summaries unless the link we're looking at is after the first header
								if( $LinkIsAfterHeader )
								{
									$LinkEndPos = strpos($CurrentBody, "]", $TokenStartPos) + 1;

									$NewLinePos = strpos($CurrentBody, "\n", $LinkEndPos);

									$NextArticleSummary = "";

									if( $NewLinePos === FALSE )
									{
										$NextArticleSummary = substr( $CurrentBody, $LinkEndPos );
									}
									else
									{
										//$NextArticleSummary .= "\nsummary:\n" + substr( $CurrentBody, $TokenStartPos, $NextTokenStartPos-$TokenStartPos );
										$NextArticleSummary = substr( $CurrentBody, $LinkEndPos, $NewLinePos-$LinkEndPos );
									}

									// search through the array of official summaries for this article, to see if any of them are found in the current article
									// if a match is found, then don't add the summary to the array of used summaries

									$SummaryExistsForThisArticle = false;
									$FoundAMatchingSummary = false;
									$FirstFoundSummary = "";

									if( array_key_exists( $CorrectArticleTitle, $ArrayAllArticleOfficialSummaries ) )
									{
										$SummaryExistsForThisArticle = true;

										foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val )
										{
											if( substr_count( $CurrentBody, $val ) > 0 )
											{
												if( !$FoundAMatchingSummary )
												{
													$FirstFoundSummary = $val;
												}

												$FoundAMatchingSummary = true;
											}
										}
									}

									$AddThisSummaryToTheArray = false;

									if( strpos( $NextArticleSummary, "by [[" ) > 0 )
									{
										//todo - consider adding a check for improperly formatted author links

										//todo - consider adding a check for an author link followed by a summary

										//todo - consider adding a check for a summary that doesn't appear until after the newline
									}
									else if( strlen( $NextArticleSummary ) > 0 )
									{
										if( $FoundAMatchingSummary )
										{
											// if we found a matching summary, then we don't need to do anything here
										}
										else
										{
											$AddThisSummaryToTheArray = true;
										}
									}
									else if( $SummaryExistsForThisArticle )
									{
										// a summary is available for this article, but the summary isn't on this page.
										// report this to 

										//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0];
										//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $FirstFoundSummary;

										// just accessing the array directly doesn't work, and neither did that $FirstFoundSummary trick.  for some reason, I need to use a foreach

										//fwrite( $stderr, "$FirstFoundSummary\n" );
										//fwrite( $stderr, "$ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0]\n" );

										$IsFirstIteration = true;

										foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val )
										{
											if( $IsFirstIteration )
											{
												$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $val;

												$IsFirstIteration = false;
											}
											//fwrite( $stderr, "$val\n" );
										}

										$ArticleLinksWithAvailableSummaryConcept[$ArticleLinksWithAvailableSummaryNextIndex] = $CurrentTitle;

										$ArticleLinksWithAvailableSummaryNextIndex++;
									}

									if( $AddThisSummaryToTheArray )
									{
										// ignore known invalid summaries
										if( strlen( $NextArticleSummary ) > 10 )
										{
											$NextArticleSummary = trim( $NextArticleSummary, " ,-—'" );

											if
												(
													substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]''" ) > 0 ||
													substr_count( $NextArticleSummary, "(and [http://lesswrong.com/lw/ht/beware_the_unsurprised/ Beware the Unsurprised])" ) > 0 ||
													substr_count( $NextArticleSummary, "In particular, the [[Litany of Tarski]]." ) > 0 ||
													substr_count( $NextArticleSummary, "(but first read: [http://lesswrong.com/lw/m2/the_litany_against_gurus/ The Litany Against Gurus])" ) > 0 ||
													substr_count( $NextArticleSummary, ", [http://lesswrong.com/lw/" ) > 0 ||
													substr_count( $NextArticleSummary, "and [http://lesswrong.com/lw/" ) > 0 ||
													substr_count( $NextArticleSummary, "'' (prerequisite: [http://lesswrong.com/lw" ) > 0 ||
													substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw" ) > 0 ||
													strcmp( $NextArticleSummary, "setting up the problem." ) == 0 ||
													strcmp( $NextArticleSummary, "[[Eliezer Yudkowsky]]" ) == 0 ||
													strcmp( $NextArticleSummary, "by Salamon and Rayhawk." ) == 0 ||
													strcmp( $NextArticleSummary, "by  [[Eliezer Yudkowsky]]" ) == 0 ||
													strcmp( $NextArticleSummary, "by talisman" ) == 0 ||
													strcmp( $NextArticleSummary, "(short story)" ) == 0 ||
													strcmp( $NextArticleSummary, "of a particular study design. Debiasing [http://lesswrong.com/lw/jk/burdensome_details/ won't be as simple] as practicing specific questions, it requires certain general habits of thought." ) == 0 ||
													strcmp( $NextArticleSummary, "as practicing specific questions, it requires certain general habits of thought." ) == 0 ||
													strcmp( $NextArticleSummary, "'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
													strcmp( $NextArticleSummary, "all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
													strcmp( $NextArticleSummary, "to this post tries to explain the cognitive twists whereby people [http://lesswrong.com/lw/ld/the_hidden_complexity_of_wishes/ smuggle] all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/n1/allais_malaise/ followups]) - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 ||
													strcmp( $NextArticleSummary, ") - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 ||
													strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/oo/explaining_vs_explaining_away/ Explaining vs. Explaining Away]'' - elementary [[reductionism]]." ) == 0 ||
													strcmp( $NextArticleSummary, "\" which essentially answered \"Not on the present state of the Art\"" ) == 0 ||
													strcmp( $NextArticleSummary, "(and its [[Privileging the hypothesis | requisites]], like [[Locating the hypothesis]])" ) == 0 ||
													strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]" ) == 0 ||
													strcmp( $NextArticleSummary, "their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/w6/recursion_magic/ ...Recursion, Magic]" ) == 0 ||
													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wf/hard_takeoff/ Hard Takeoff]" ) == 0 ||
													strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wg/permitted_possibilities_locality/ Permitted Possibilities, & Locality]" ) == 0 ||
													strcmp( $NextArticleSummary, "(in the martial arts)" ) == 0 ||
													strcmp( $NextArticleSummary, "(in both psychotherapy and martial arts)" ) == 0 ||
													strcmp( $NextArticleSummary, "Description and account of the game." ) == 0 ||
													false
												)
											{
												// don't add the invalid summary
											}
											else
											{
												$ArrayAllArticleUsedSummaries[$SearchResult][] = $NextArticleSummary;
											}
										}
									}
								}
							}

							// for authors who have their own wikipages, link to the wikipage instead of the LW user page
							$CorrectArticleAuthor = str_replace("[http://lesswrong.com/user/Eliezer_Yudkowsky Eliezer_Yudkowsky]", "[[Eliezer Yudkowsky]]", $CorrectArticleAuthor);



							// check if the link ends with a /

							if
								(
									strcmp( $LastCharInLink, "/" ) != 0 &&
									$SlashCountBeforeAdd < 6
								)
							{
								$ArticleLinksWithoutEndingSlash[$ArticleLinksWithoutEndingSlashNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor";

								$ArticleLinksWithoutEndingSlashConcept[$ArticleLinksWithoutEndingSlashNextIndex] = $CurrentTitle;

								$ArticleLinksWithoutEndingSlashNextIndex++;
							}



							if( strcmp($CurrentArticleTitle, $CorrectArticleTitle) != 0 && $LinkIsAfterHeader )
							{
								// don't report an error for the following links:
								if
									(
										substr_count( $CurrentArticleTitle, "ranges over anything, not just internal subjective experiences") <= 0 &&
										substr_count( $CurrentArticleTitle, "sequence leading up") <= 0 &&
										substr_count( $CurrentArticleTitle, "smuggle") <= 0 &&
										substr_count( $CurrentArticleTitle, "justify using") <= 0 &&
										substr_count( $CurrentArticleTitle, "choose other acts to justify") <= 0 &&
										substr_count( $CurrentArticleTitle, "Timeless decision theory") <= 0 &&
										substr_count( $CurrentArticleTitle, "philosophical majoritarianism") <= 0 &&
										substr_count( $CurrentArticleTitle, "critical comments") <= 0 &&
										substr_count( $CurrentArticleTitle, "Positive Bias") <= 0 &&
										substr_count( $CurrentArticleTitle, "Hindsight Bias") <= 0 &&
										substr_count( $CurrentArticleTitle, "not an isolated artifact") <= 0 &&
										substr_count( $CurrentArticleTitle, "won't be as simple") <= 0 &&
										substr_count( $CurrentArticleTitle, "Illusion of Transparency") <= 0 &&
										substr_count( $CurrentArticleTitle, "Affect Heuristic") <= 0 &&
										substr_count( $CurrentArticleTitle, "Evaluability") <= 0 &&
										substr_count( $CurrentArticleTitle, "Unbounded Scales, Huge Jury Awards, and Futurism") <= 0 &&
										substr_count( $CurrentArticleTitle, "subsequent") <= 0 &&
										substr_count( $CurrentArticleTitle, "followups") <= 0 &&
										substr_count( $CurrentArticleTitle, "Do We Believe <i>Everything</i> We're Told?") <= 0 &&
										substr_count( $CurrentArticleTitle, "Quantum Physics") <= 0 &&
										substr_count( $CurrentArticleTitle, "Shut Up and Do the Impossible") <= 0 &&
										substr_count( $CurrentArticleTitle, "You ''Can'' Face Reality") <= 0 &&
										substr_count( $CurrentArticleTitle, "Absence of Evidence ''Is'' Evidence of Absence") <= 0 &&
										substr_count( $CurrentArticleTitle, "Doublethink: Choosing to be Biased") <= 0 &&
										substr_count( $CurrentArticleTitle, "Anti-Epistemology") <= 0 &&
										substr_count( $CurrentArticleTitle, "Is Humanism a Religion-Substitute?") <= 0 &&
										substr_count( $CurrentArticleTitle, "Your Strength As A Rationalist") <= 0 &&
										substr_count( $CurrentArticleTitle, "Absence of Evidence '''is''' Evidence of Absence") <= 0 &&
										substr_count( $CurrentArticleTitle, "Reversed Stupidity is Not Intelligence") <= 0 &&
										substr_count( $CurrentArticleTitle, "A Human's Guide to Words") <= 0 &&
										substr_count( $CurrentArticleTitle, "here") <= 0 &&
										true
									)
								{
									$ArticleLinksWithWrongTitle[$ArticleLinksWithWrongTitleNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(title was [$CurrentLink $CurrentArticleTitle])";

									$ArticleLinksWithWrongTitleConcept[$ArticleLinksWithWrongTitleNextIndex] = $CurrentTitle;

									$ArticleLinksWithWrongTitleNextIndex++;

									if( $Debug )
									{
										//fwrite( $stderr, "article title: $CurrentArticleTitle\ncorrect title: $CorrectArticleTitle\n\n" );
									}
								}
							}

							//fwrite( $stderr, "article title: $CurrentArticleTitle\n\n" );

							$ByCharacters = substr( $CurrentBody, $TitleEndPos, 6 );

							$FirstHeaderPos = strpos($CurrentBody, "==", 0);





							// check if the link has an author

							// ignore missing authors if the link appears before any headers
							if( strcmp($ByCharacters, "] by [") != 0 && $LinkIsAfterHeader )
							{
								$LineUpToAuthor = substr( $CurrentBody, $TokenStartPos-1, ($TitleEndPos+6)-($TokenStartPos-1) );
								$LineUpToAuthor = str_replace("\n", "\\n", $LineUpToAuthor);

								$ArticleLinksWithoutAuthor[$ArticleLinksWithoutAuthorNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(was $LineUpToAuthor)";

								$ArticleLinksWithoutAuthorConcept[$ArticleLinksWithoutAuthorNextIndex] = $CurrentTitle;

								$ArticleLinksWithoutAuthorNextIndex++;

								if( $Debug )
								{
									//fwrite( $stderr, "Concept: $CurrentTitle, by characters: <$ByCharacters>\n\n" );
								}
							}

							//todo - change this to check if the author is correct!
						}
					
					
					
					
					
						//fwrite( $stderr, "link start: $TokenStartPos \n link end: $TokenEndPos \n current link: $CurrentLink\n\n" );
					
						// find the current link in the array, if it exists

						$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );

						if( $SearchResult !== FALSE )
						{
							// find if this concept exists in the array
							if( in_array( $CurrentTitle, $ArrayAllArticleIndexedConcepts[$SearchResult] ) )
							{
								// mark the link as found
								$ArrayAllArticleFoundConcepts     [$SearchResult][$CurrentTitle] = true;
							}
							else
							{
								// add the concept to the array of unindexed concepts
								$ArrayAllArticleNotIndexedConcepts[$SearchResult][] = $CurrentTitle;
							}
						}
						
						$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
						$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);

						$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
						
						if
							(
								$TokenEndPos <= $TokenStartPos ||
								$TokenStartPos === FALSE ||
								substr_count( $CurrentLink, "http://lesswrong.com/lw/" ) <= 0
							)
						{
							$FinishedFindingLinks = true;
						}
					}
				}
				
				
				//$IsReadingConceptPages = false;

				//fwrite( $stderr, $CurrentBody );

				//unfinished!!!

				//for each page:

				// first check if the page is in the $ConceptFound list

				// read the list of articles in the "Blog posts" section.
				// report any links that appear in the All Articles page, for that concept, but don't appear in the wiki page
				// report any links that appear in the wiki page, but don't appear in the All Articles page, for that concept


				$SuccessfullyReadOneConceptPage = true;
			}
		}
	}
}

function defaultHandler($parser, $data)
{
}

function new_xml_parser($file)
{
    global $parser_file;
	
    $xml_parser = xml_parser_create();
    xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);
    xml_set_element_handler($xml_parser, "startElement", "endElement");
    xml_set_character_data_handler($xml_parser, "characterData");
    xml_set_default_handler($xml_parser, "defaultHandler");
	
    if( !($fp = @fopen($file, "r")) )
    {
        return false;
    }
    if( !is_array($parser_file) )
    {
        settype($parser_file, "array");
    }
    $parser_file[$xml_parser] = $file;
    return array($xml_parser, $fp);
}



function ReadOnceThroughTheWholeXMLFile()
{
	global $XMLfile;

	// create the XML parser

	if( !(list($xml_parser, $fp) = new_xml_parser($XMLfile)) )
	{
		die("could not open XML input");
	}

	// read the XML file

	while( $data = fread($fp, 4096) )
	{
		if( !xml_parse($xml_parser, $data, feof($fp)) )
		{
			die( sprintf("XML error: %s at line %d\n",
				xml_error_string(xml_get_error_code($xml_parser)),
				xml_get_current_line_number($xml_parser)));
		}
	}

	// we're done with the XML file now, so close it

	fclose($fp);
}


fwrite( $stderr, "Reading through the All Articles pages\n\n" );

// first read through the All Articles pages

$IsReadingAllArticlesPages = true;

$NextYearToRead = $FirstYearToRead;

$FailedToReadAllArticlesPage = false;

// keep looping through the whole XML file
// abort when we've successfully read all of the All Articles pages,
// or if we've looped through the whole file without finding the next one

// there's probably a more efficient way than looping repeatedly through the whole XML file,
//  but that would probably make the code even more tangled than it is now.

// this looping should be unnecessary, because the All Articles pages should be in order in the XML file, but I'll leave this code as it is for now.

while
	(
		$NextYearToRead <= $FinalYearToRead &&
		! $FailedToReadAllArticlesPage
	)
{
	fwrite( $stderr, "processing the XML file, year $NextYearToRead \n\n" );
	
	$SuccessfullyReadOneAllArticlesPage = false;

	ReadOnceThroughTheWholeXMLFile();
	
	if( ! $SuccessfullyReadOneAllArticlesPage )
	{
		$FailedToReadAllArticlesPage = true;
		
		fwrite( $stderr, "error: failed to read the next All Articles page, year $NextYearToRead \n\n" );
	}
}

$IsReadingAllArticlesPages = false;



fwrite( $stderr, "Reading through the Summaries pages\n\n" );

// next read through the summaries pages

$IsReadingSummariesPages = true;

$NextYearToRead = $FirstYearToRead;

$FailedToReadSummariesPage = false;

// keep looping through the whole XML file
// abort when we've successfully read all of the Summaries pages,
// or if we've looped through the whole file without finding the next one

// there's probably a more efficient way than looping repeatedly through the whole XML file,
//  but that would probably make the code even more tangled than it is now.

// this looping should be unnecessary, because the Summaries pages should be in order in the XML file, but I'll leave this code as it is for now.

while
	(
		$NextYearToRead <= $FinalYearToRead &&
		! $FailedToReadSummariesPage
	)
{
	fwrite( $stderr, "processing the XML file for summaries, year $NextYearToRead \n\n" );
	
	$SuccessfullyReadOneSummariesPage = false;

	ReadOnceThroughTheWholeXMLFile();
	
	if( ! $SuccessfullyReadOneSummariesPage )
	{
		$FailedToReadSummariesPage = true;
		
		fwrite( $stderr, "error: failed to read the next summaries page, year $NextYearToRead \n\n" );
	}
}

$IsReadingSummariesPages = false;



fwrite( $stderr, "marking all concepts as not found yet\n\n" );

// now read through the array of concepts, and mark them all as unfound

foreach( $ArrayAllArticleIndexedConcepts as $key => $val )
{
	foreach( $val as $key2 => $val2 )
	{
		$ConceptFound[$val2] = false;
	}
}



fwrite( $stderr, "Reading through the Concept pages\n\n" );

// next read through the Concept pages

$IsReadingConceptPages     = true;

$FailedToReadConceptPage = false;

// just read once through the whole XML file
// there's no need to read through the concept pages in any particular order

ReadOnceThroughTheWholeXMLFile();

fwrite( $stderr, "Pages read: $PagesRead\n" );

if( ! $SuccessfullyReadOneConceptPage )
{
	$FailedToReadConceptPage = true;

	fwrite( $stderr, "error: failed to read any Concept page \n\n" );
}



fwrite( $stderr, "Doing final processing, step 1 of 3...\n\n" );


// now go through the $ConceptFound array

// for any concept that doesn't already have a wikipage, output a template for a blank page, with the "Blog posts" section filled in with all of the blog posts that mention this topic


if( false )
{
	// this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link
	// disabled now because it gives too many false positives
	fwrite( $fp, "\n\n==The following concept pages link to OvercomingBias.com articles:==\n\n" );

	foreach( $PagesWithOvercomingBiasLinks as $key => $val )
	{
		fwrite( $fp, "*[[$key]]\n" );
	}
}



fwrite( $fp, "\n\n==The following concept pages have comments:==\n\n" );

foreach( $PagesWithComments as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



// this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link
// disabled now because it gives too many false positives
fwrite( $fp, "\n\n==The following concept pages have the \"Overcoming Bias Articles\" header:==\n\n" );

foreach( $PagesWithOvercomingBiasArticlesHeader as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concept pages have \"External references\" instead of \"References\":==\n\n" );

foreach( $PagesWithExternalReferences as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concept pages have a miscapitalized \"See Also\" header:==\n\n" );

foreach( $PagesWithSeeAlso as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concept pages have an author link that links to an external site:==\n\n" );

foreach( $PagesWithExternalAuthorLinks as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concept pages have an extra newline after the wikilink template:==\n\n" );

foreach( $PagesWithNewlineAfterWikiLink as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concept pages have the See Also section before the Blog Posts section:==\n\n" );

foreach( $PagesWithSeeAlsoBeforeBlogPosts as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}


// disabled because of a weird bug that was introduced when I added the "see also" checking
/*
fwrite( $fp, "\n\n==The following article links are missing the / at the end, or aren't in the index:==\n\n" );

$PreviousConcept = "";

foreach( $ArticleLinksWithoutEndingSlash as $key => $val )
{
	if( strcmp( $PreviousConcept, $ArticleLinksWithoutEndingSlashConcept[$key] ) != 0 )
	{
		fwrite( $fp, "\n*[[$ArticleLinksWithoutEndingSlashConcept[$key]]]\n" );
	}
	fwrite( $fp, "*$val\n" );
	
	$PreviousConcept = $ArticleLinksWithoutEndingSlashConcept[$key];
}
*/


fwrite( $fp, "\n\n==The following article links have a wrong or improperly formatted title:==\n\n" );

$PreviousConcept = "";

foreach( $ArticleLinksWithWrongTitle as $key => $val )
{
	if( strcmp( $PreviousConcept, $ArticleLinksWithWrongTitleConcept[$key] ) != 0 )
	{
		fwrite( $fp, "\n*[[$ArticleLinksWithWrongTitleConcept[$key]]]\n" );
	}
	fwrite( $fp, "*$val\n" );
	
	$PreviousConcept = $ArticleLinksWithWrongTitleConcept[$key];
}


fwrite( $fp, "\n\n==The following article links have a summary available that was not added to the page:==\n\n" );

$PreviousConcept = "";

foreach( $ArticleLinksWithAvailableSummary as $key => $val )
{
	if( strcmp( $PreviousConcept, $ArticleLinksWithAvailableSummaryConcept[$key] ) != 0 )
	{
		fwrite( $fp, "\n*[[$ArticleLinksWithAvailableSummaryConcept[$key]]]\n" );
	}
	fwrite( $fp, "*$val\n" );
	
	$PreviousConcept = $ArticleLinksWithAvailableSummaryConcept[$key];
}





if( false )
{
	/// this section was disabled because there were way too many false positives
	fwrite( $fp, "\n\n==The following article links have a missing or improperly formatted author:==\n\n" );

	$PreviousConcept = "";

	foreach( $ArticleLinksWithoutAuthor as $key => $val )
	{
		if( strcmp( $PreviousConcept, $ArticleLinksWithoutAuthorConcept[$key] ) != 0 )
		{
			fwrite( $fp, "\n*[[$ArticleLinksWithoutAuthorConcept[$key]]]\n" );
		}
		fwrite( $fp, "*$val\n" );

		$PreviousConcept = $ArticleLinksWithoutAuthorConcept[$key];
	}
}



fwrite( $fp, "\n\n==The following concepts don't have wikipages with links to LessWrong.com articles yet:==\n\n" );

foreach( $ConceptFound as $key => $val )
{
	if( ! $val )
	{
		fwrite( $fp, "*[[$key]]\n" );
	}
}



fwrite( $fp, "\n\n==The following concepts are not in the All Articles pages:==\n\n" );

foreach( $ConceptNotInIndex as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}



fwrite( $fp, "\n\n==The following concepts are in the All Articles page, but are redirects:==\n\n" );

foreach( $ConceptThatAreRedirects as $key => $val )
{
	fwrite( $fp, "*[[$key]]\n" );
}




fwrite( $stderr, "Doing final processing, step 2 of 3...\n\n" );



// now output the list of which articles in the All Articles index are missing an entry:

fwrite( $fp, "\n\n==The following articles in the [[Less Wrong/All Articles|All Articles]] index are missing an entry:==\n" );

foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val )
{
	if( count( $val ) > 0 )
	{
		fwrite( $fp, "\n*[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]] is missing the following concepts:\n" );
		
		foreach( $val as $key2 => $val2 )
		{
			fwrite( $fp, "**[[$val2]]\n" );
		}
	}
}



fwrite( $stderr, "Doing final processing, step 3 of 3...\n\n" );


// now output the list of which article links need to be added to the concept pages:

fwrite( $fp, "\n\n==The following article links need to be added to the concept pages:==\n" );

// for each concept
foreach( $ConceptFound as $key => $val )
{
	
	// make an array to store the missing links
	$MissingLinks = array();

	// for each article
	foreach( $ArrayAllArticleIndexedConcepts as $key2 => $val2 )
	{
		// check if the article contains the concept
		if( array_search($key, $val2) )
		{
			//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" );
			
			// check if the concept's link was not found
			if( ! array_key_exists($key, $ArrayAllArticleFoundConcepts[$key2]) )
			{
				// add it to the list of concepts whose links were not found
				$MissingLinks[] = "**[$ArrayAllArticleLink[$key2] $ArrayAllArticleTitle[$key2]] by $ArrayAllArticleAuthor[$key2]";
				
				//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" );
			}
		}
	}
	
	// if there are any not found article links, then output the list
	if( count($MissingLinks) > 0 )
	{
		fwrite( $fp, "\n*[[$key]] is missing the following article links:\n" );
		
		foreach( $MissingLinks as $key3 => $val3 )
		{
			fwrite( $fp, "$val3\n" );
		}
	}
}


// now output the list of which article links need to be added to the concept pages:

fwrite( $fp, "\n\n==The following See Also links only go one way:==\n" );

// for each concept
foreach( $ConceptSeeAlso as $key => $val )
{
	foreach( $val as $key2 => $val2 )
	{
		$Concept1 = $key;
		$Concept2 = $val2;
		
		$MatchFound = false;

		foreach( $ConceptSeeAlso as $key3 => $val3 )
		{
			foreach( $val3 as $key4 => $val4 )
			{
				$Concept3 = $key3;
				$Concept4 = $val4;
				
				if( $Concept1 == $Concept4 && $Concept2 == $Concept3 )
				{
					$MatchFound = true;
				}
			}
		}
		
		if( !$MatchFound )
		{
			fwrite( $fp, "\n*[[$Concept1]] -> [[$Concept2]]" );
		}
	}
}



fwrite( $fp, "\n\n==The following is a list of all concept pages:==\n\n" );

$SortedConcepts = $ConceptFound;

ksort($SortedConcepts);

foreach( $SortedConcepts as $key => $val )
{
	if( $val )
	{
		fwrite( $fp, "*[[$key]]\n" );
	}
}


fwrite( $fp, "\n\n==Links to the All Articles pages:==\n" );
fwrite( $fp, "*[[Less Wrong/All Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2006 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2007 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2008 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2009 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2010 Articles]]\n" );


fwrite( $fp, "\n\n==Links to the Summaries pages:==\n" );
fwrite( $fp, "*[[Less Wrong/Article summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2006 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2007 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2008 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2009 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2010 Articles/Summaries]]\n" );



fclose($fp);


$fp = fopen('ConceptGraph.dot.txt', 'w');

//{_COPYBLOCK1
fwrite($fp, "digraph G {\n\nnode [fontsize=\"$fontsize\"]\n\n");

// output the data for each concept
foreach( $ConceptSeeAlso as $key => $val )
{
	$curLine = "";
	
	$NameWithSpaces = $key;
	$NameWithoutSpaces = str_replace(" ", "_", $NameWithSpaces);

	$curName  = "\"" . $NameWithoutSpaces . "\"";
	$curName  = str_pad( $curName, $PaddingValue1 );
	$curLine .= $curName;

	$curLine .= "[label=\"";

	$curName  = $NameWithSpaces;
	$curName .= "\"";
	$curName  = str_pad( $curName, $PaddingValue2 );
	$curLine .= $curName;

//	$curLine .= ", color=";
//
//	$curLine .= $NewArrayScenarioOutlineColors[$key];
//
//	$curLine .= ", shape=";
//
//	$curLine .= $NewArrayScenarioShapes[$key];
//
//	$curLine .= ", style=";
//
//	$curLine .= $NewArrayScenarioStyles[$key];
//
//	$curLine .= ", fillcolor=\"#";
//
//	$curLine .= $NewArrayScenarioFillColors[$key];
//
//	$curLine .= "\"";

	//if( strcmp($NewArrayScenarioNamesWithoutSpaces[$key], $ArrayScenarioNamesWithoutSpaces[$basekey]) == 0 )
	//{
	//	$curLine .= ", peripheries=3";
	//}

	$newURL = "http://wiki.lesswrong.com/wiki/" . str_replace(" ", "_", $NameWithSpaces);

	$curLine .= ", URL=\"$newURL\"";

	$curLine .= "];\n";

	fwrite($fp, $curLine);
}

fwrite($fp, "\n\n");

// output the data for the edges
foreach( $ConceptSeeAlso as $key => $val )
{
	foreach( $val as $key2 => $val2 )
	{
		$curLine = "";

		$Name1WithSpaces = $key;
		$Name1WithoutSpaces = str_replace(" ", "_", $Name1WithSpaces);

		$Name2WithSpaces = $val2;
		$Name2WithoutSpaces = str_replace(" ", "_", $Name2WithSpaces);

		$curName  = "\"" . $Name1WithoutSpaces . "\"";
		$curName  = str_pad( $curName, $PaddingValue1 );
		$curLine .= $curName;

		$curLine .= "-> ";

		$curLine .= "\"" . $Name2WithoutSpaces . "\"";

		$curLine .= "\n";

		fwrite($fp, $curLine);

	}
}

fwrite($fp, "\n\n}");

//}_COPYBLOCK1


fclose($fp);



$fp = fopen('AllArticles.txt', 'w');


fwrite( $stderr, "Outputting new All Articles page...\n\n" );


$ProgressCounter = 0;

foreach( $ArrayAllArticleTitle as $key => $val )
{
	$ConceptString = "";
	
	$TempConceptArray = array();

	foreach( $ArrayAllArticleIndexedConcepts[$key] as $key2 => $val2 )
	{
		if( ! in_array($val2, $TempConceptArray) )
		{
			$ConceptString .= "[[$val2]], ";
			$TempConceptArray[] = $val2;
		}
	}
	
	foreach( $ArrayAllArticleNotIndexedConcepts[$key] as $key2 => $val2 )
	{
		if( ! in_array($val2, $TempConceptArray) )
		{
			$ConceptString .= "[[$val2]], ";
			$TempConceptArray[] = $val2;
		}
	}
	
	if( strlen($ConceptString) > 2 )
	{
		$ConceptString = substr($ConceptString, 0, -2);
	}

	fwrite( $fp, "|-valign=\"top\"\n" );
	fwrite( $fp, "| [$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]\n" );
	fwrite( $fp, "| $ConceptString\n" );
	fwrite( $fp, "| $ArrayAllArticleAuthor[$key]\n" );

	if( $ProgressCounter % 100 == 0 )
	{
		//fwrite( $stderr, "Progress: $ProgressCounter\n" );
	}
	
	$ProgressCounter++;
}

fwrite( $stderr, "Progress: $ProgressCounter\n" );



fclose($fp);




$fp = fopen('ArticleSummaries.txt', 'w');


fwrite( $stderr, "Outputting article summaries...\n\n" );


$ProgressCounter = 0;

foreach( $ArrayAllArticleTitle as $key => $val )
{
	$ConceptString = "";
	
	$TempConceptArray = array();

	$IsFirstIteration = true;

	foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 )
	{
		if( !$IsFirstIteration )
		{
			fwrite( $fp, "\n\n(alternate summary:)\n\n" );
		}
		else
		{
			fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" );
	
			$IsFirstIteration = false;
		}
		
		fwrite( $fp, $val2 );
	}

	if( $ProgressCounter % 100 == 0 )
	{
		//fwrite( $stderr, "Progress: $ProgressCounter\n" );
	}
	
	$ProgressCounter++;
}

fwrite( $stderr, "Progress: $ProgressCounter\n" );



fclose($fp);




$fp = fopen('ArticleSummaries2.txt', 'w');


fwrite( $stderr, "Outputting article summaries...\n\n" );


$ProgressCounter = 0;

foreach( $ArrayAllArticleTitle as $key => $val )
{
	$ConceptString = "";
	
	$TempConceptArray = array();

	fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" );
	
	$IsFirstIteration = true;
	
	if( array_key_exists( $ArrayAllArticleTitle[$key], $ArrayAllArticleOfficialSummaries ) )
	{
		foreach( $ArrayAllArticleOfficialSummaries[$ArrayAllArticleTitle[$key]] as $key2 => $val2 )
		{
			if( !$IsFirstIteration )
			{
				fwrite( $fp, "\n\n(alternate summary:)\n\n" );
			}
			else
			{
				$IsFirstIteration = false;
			}

			fwrite( $fp, $val2 );
		}
	}

	foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 )
	{
		if( !$IsFirstIteration )
		{
			fwrite( $fp, "\n\n(alternate summary:)\n\n" );
		}
		else
		{
			$IsFirstIteration = false;
		}
		
		fwrite( $fp, $val2 );
	}

	if( $ProgressCounter % 100 == 0 )
	{
		//fwrite( $stderr, "Progress: $ProgressCounter\n" );
	}
	
	$ProgressCounter++;
}

fwrite( $stderr, "Progress: $ProgressCounter\n" );



fclose($fp);


//*/


fwrite( $stderr, "Processing Recent Post List...\n\n" );



$handle = fopen('http://lesswrong.com/recentposts', 'r');

$RecentPostRawData = "";

while( ( $buf = fread( $handle, 8192 ) ) != '' )
{
	$RecentPostRawData .= $buf;
}

if( $buf === FALSE )
{
	return "error reading file";
}

fclose($handle);



$fp = fopen('RecentPosts.txt', 'w');

// read the data from the recent posts page, then output the data in reverse order

$ArrayRecentPostLines = array();


$PostLineStartPos  = strpos( $RecentPostRawData, "<h3>", 0);
$PostLineEndPos    = $PostLineStartPos;

$EndPos = strpos( $RecentPostRawData, "<p class=\"nextprev\">View more:", 0);


$NextArticleTitle = "";
$IterationCount = 0;
$PastTheEnd = false;


while
	(
//		! in_array($NextArticleTitle, $ArrayAllArticleTitle) &&
		$IterationCount < 1000 &&
		! $PastTheEnd &&
		true
	)
{
	$PostLineStartPos  = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9;
	$PostLineEndPos    = strpos( $RecentPostRawData, "\"", $PostLineStartPos );

	$NextArticleLink   = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );

	$PostLineStartPos  = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1;
	$PostLineEndPos    = strpos( $RecentPostRawData, "<", $PostLineStartPos );

	$NextArticleTitle  = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
	$NextArticleTitle  = str_replace(""", "\"", $NextArticleTitle);

	$PostLineStartPos  = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9;
	$PostLineEndPos    = strpos( $RecentPostRawData, "\"", $PostLineStartPos );

	$NextArticleAuthorLink = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );

	$PostLineStartPos  = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1;
	$PostLineEndPos    = strpos( $RecentPostRawData, "<", $PostLineStartPos );

	$NextArticleAuthorName = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
	
	
	if( $PostLineStartPos > $EndPos )
	{
		$PastTheEnd = true;
	}
	else
	{
		$CurrentLineString = "";


		$CurrentLineString .= "|-valign=\"top\"\n";
		$CurrentLineString .= "| [http://lesswrong.com$NextArticleLink $NextArticleTitle]\n";
		$CurrentLineString .= "| \n";
		$CurrentLineString .= "| [$NextArticleAuthorLink $NextArticleAuthorName]\n";

		//fwrite( $fp, $CurrentLineString );

		$ArrayRecentPostLines[$IterationCount] = $CurrentLineString;

		$IterationCount++;
	}
}

//fwrite( $fp, "\n\n\n\n\n\n\n\n----------------------------------------\n\n\n\n\n\n\n\n" );


for( $index = $IterationCount-1; $index >= 0; $index-- )
//foreach( $ArrayRecentPostLines as $key => $val )
{
	fwrite( $fp, $ArrayRecentPostLines[$index] );
}



fwrite( $stderr, "Done" );


fclose($fp);


if( $Debug )
{
	if( true )
	{
		$fp = fopen('Debug.txt', 'w');

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptFound \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ConceptFound as $key => $val ) { $data = $key . "\n" . print_r($val, true) . "\n"; fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ArrayAllArticleIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleFoundConcepts \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ArrayAllArticleFoundConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleNotIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptSeeAlso \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ConceptSeeAlso as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleOfficialSummaries \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ArrayAllArticleOfficialSummaries as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }

		fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleUsedSummaries \n\n\n\n\n\n\n\n\n\n" );

		foreach( $ArrayAllArticleUsedSummaries as $key => $val ) { $data = $ArrayAllArticleTitle[$key] . "\n" . print_r($val, true); fwrite( $fp, $data ); }


		fclose($fp);
	}
}



fclose($stderr);



//
?>
User:PeerInfinity/Scripts/SyncArticleLinks.php

Navigation menu

Search