User:PeerInfinity/Scripts/SyncArticleLinks.php
< User:PeerInfinity | Scripts
Jump to navigation
Jump to search
<?php
// SyncArticleLinks.php // synchronize the list of article links on the concept pages with the list of concepts on the "All Articles" pages
// for the latest version of this script's output, see:
///stderr output - the stderr output of this script
///SyncArticleLinksOutput.txt - the output written to the text file, containing the Sync results
//
$stderr = fopen( "php://stderr", "wt" );
//*
$XMLfile = "daily_XML_dump.xml";
// the arrays for the data read from the All Articles pages:
// it would be more proper to create a struct for this, but for now I'll just do it the quick and dirty way
$NextAllArticleIndex = 0;
// these arrays are indexed by $NextAllArticleIndex
$ArrayAllArticleTitle = array();
$ArrayAllArticleLink = array();
$ArrayAllArticleIndexedConcepts = array(); // this is an array of arrays!
$ArrayAllArticleFoundConcepts = array(); // this is an array of arrays!
$ArrayAllArticleNotIndexedConcepts = array(); // this is an array of arrays!
$ArrayAllArticleAuthor = array();
$ArrayAllArticleDate = array(); // currently unused, This would need to be read by following the link to the article, which might be a good idea to implement eventually
$ArrayAllArticleOfficialSummaries = array(); // this is an array of arrays! //this one is indexed by article name!
$ArrayAllArticleUsedSummaries = array(); // this is an array of arrays!
// the array for all Concepts found
// these arrays are indexed by concept title
$ConceptFound = array();
$ConceptNotInIndex = array();
$ConceptThatAreRedirects = array();
$ConceptSeeAlso = array(); // this is an array of arrays!
// these arrays are indexed by concept title
$PagesWithOvercomingBiasLinks = array();
$PagesWithComments = array();
$PagesWithOvercomingBiasArticlesHeader = array();
$PagesWithExternalReferences = array();
$PagesWithSeeAlso = array();
$PagesWithExternalAuthorLinks = array();
$PagesWithNewlineAfterWikiLink = array();
$PagesWithSeeAlsoBeforeBlogPosts = array();
// ugh... this is ugly:
// these arrays are indexed by the index variables below
$ArticleLinksWithoutEndingSlash = array();
$ArticleLinksWithWrongTitle = array();
$ArticleLinksWithoutAuthor = array();
$ArticleLinksWithAvailableSummary = array();
$ArticleLinksWithoutEndingSlashConcept = array();
$ArticleLinksWithWrongTitleConcept = array();
$ArticleLinksWithoutAuthorConcept = array();
$ArticleLinksWithAvailableSummaryConcept = array();
$ArticleLinksWithoutEndingSlashNextIndex = 0;
$ArticleLinksWithWrongTitleNextIndex = 0;
$ArticleLinksWithoutAuthorConceptNextIndex = 0;
$ArticleLinksWithAvailableSummaryIndex = 0;
$CurrentTitle = "";
$CurrentBody = "";
$IsReadingTitle = false;
$IsReadingText = false;
$IsReadingAllArticlesPages = false;
$IsReadingSummariesPages = false;
$IsReadingConceptPages = false;
$IsReadingOneAllArticlesPage = false;
$IsReadingOneSummariesPage = false;
$IsReadingOneConceptPage = false;
$FirstYearToRead = 2006;
$FinalYearToRead = 2010; //todo - update this in 2011!!!
$NextYearToRead = $FirstYearToRead;
$YearCurrentlyBeingRead = $FirstYearToRead;
$SuccessfullyReadOneAllArticlesPage = false;
$SuccessfullyReadOneSummariesPage = false;
$FailedToReadAllArticlesPage = false;
$SuccessfullyReadOneConceptPage = false;
$FailedToReadConceptPage = false;
$PagesRead = 0;
$Debug = true;
// for doxygen
$fontsize = 12;
// to make the script easier to read, pad each section to a specific number of characters, to make everything line up
$PaddingValue1 = 100;
$PaddingValue2 = 50;
//open a text file for the output of this script
//the script will send its output both to this text file and to stdout (or was it stderr?)
$fp = fopen('SyncArticleLinksOutput.txt', 'w');
// now for some code that I still haven't figured out a good way to untangle
// this processing currently needs to be done in these functions, and can't be moved to a more sensible place
function startElement($parser, $name, $attribs)
{
global $CurrentTitle ;
global $CurrentBody ;
global $IsReadingTitle ;
global $IsReadingText ;
global $stderr;
// remember what element we're reading, so that we know what to do in characterData()
if( $name == "TITLE" )
{
$IsReadingTitle = true;
}
else
{
$IsReadingTitle = false;
}
if( $name == "TEXT" )
{
$IsReadingText = true;
}
else
{
$IsReadingText = false;
}
}
function characterData($parser, $data)
{
global $stderr;
global $XMLfile;
global $NextAllArticleIndex;
global $ArrayAllArticleTitle ;
global $ArrayAllArticleLink ;
global $ArrayAllArticleIndexedConcepts ;
global $ArrayAllArticleFoundConcepts ;
global $ArrayAllArticleNotIndexedConcepts;
global $ArrayAllArticleAuthor ;
global $ArrayAllArticleDate ;
global $ArrayAllArticleOfficialSummaries ;
global $ArrayAllArticleUsedSummaries ;
global $ConceptFound;
global $ConceptNotInIndex;
global $ConceptThatAreRedirects;
global $ConceptSeeAlso;
global $PagesWithOvercomingBiasLinks;
global $PagesWithComments;
global $PagesWithOvercomingBiasArticlesHeader;
global $PagesWithExternalReferences;
global $PagesWithSeeAlso;
global $PagesWithExternalAuthorLinks;
global $PagesWithNewlineAfterWikiLink;
global $PagesWithSeeAlsoBeforeBlogPosts;
global $ArticleLinksWithoutEndingSlash;
global $ArticleLinksWithWrongTitle;
global $ArticleLinksWithoutAuthor;
global $ArticleLinksWithAvailableSummary;
global $ArticleLinksWithoutEndingSlashConcept;
global $ArticleLinksWithWrongTitleConcept;
global $ArticleLinksWithoutAuthorConcept;
global $ArticleLinksWithAvailableSummaryConcept;
global $ArticleLinksWithoutEndingSlashNextIndex;
global $ArticleLinksWithWrongTitleNextIndex;
global $ArticleLinksWithoutAuthorNextIndex;
global $ArticleLinksWithAvailableSummaryNextIndex;
global $CurrentTitle;
global $CurrentBody ;
global $IsReadingTitle;
global $IsReadingText ;
global $IsReadingAllArticlesPages;
global $IsReadingSummariesPages ;
global $IsReadingConceptPages ;
global $IsReadingOneAllArticlesPage;
global $IsReadingOneSummariesPage ;
global $IsReadingOneConceptPage ;
global $FirstYearToRead;
global $FinalYearToRead;
global $NextYearToRead;
global $YearCurrentlyBeingRead;
global $SuccessfullyReadOneAllArticlesPage;
global $SuccessfullyReadOneSummariesPage ;
global $FailedToReadAllArticlesPage ;
global $SuccessfullyReadOneConceptPage;
global $FailedToReadConceptPage;
global $PagesRead;
global $Debug;
//open a text file for the output of this script
//the script will send its output both to this text file and to stdout (or was it stderr?)
$fp = fopen('SyncArticleLinksOutput.txt', 'w');
// if we're reading the title, then remember the title, and check if we want to read the page content
if( $IsReadingTitle )
{
$CurrentBody = "";
$CurrentTitle = $data;
$IsReadingTitle = false;
$IsReadingOneAllArticlesPage = false;
$IsReadingOneSummariesPage = false;
$IsReadingOneConceptPage = false;
// skip category pages, template pages, etc.
if( substr_count($CurrentTitle, "Category:") > 0 )
{
}
else if( substr_count($CurrentTitle, "Template:") > 0 )
{
}
else if( substr_count($CurrentTitle, "Talk:") > 0 )
{
}
else if( substr_count($CurrentTitle, "Category talk:") > 0 )
{
}
else if
(
$CurrentTitle == "Catch Phrases" ||
$CurrentTitle == "Categories" ||
$CurrentTitle == "Chat Logs/2009-04-11" ||
$CurrentTitle == "Disagreements on Less Wrong" ||
$CurrentTitle == "Series" ||
$CurrentTitle == "Using the wiki" ||
$CurrentTitle == "Acronyms used on Less Wrong" ||
$CurrentTitle == "Less Wrong/Errors from moving Eliezer's posts from OB to LW" ||
false
)
{
}
else
{
if( $IsReadingAllArticlesPages && $NextYearToRead <= $FinalYearToRead )
{
// check if the page title is the next year we want to read
if
(
( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles") > 0 ) ||
( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles") > 0 ) ||
( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles") > 0 ) ||
( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles") > 0 ) ||
( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles") > 0 )
//todo - update this in 2011!!!
)
{
$YearCurrentlyBeingRead = $NextYearToRead;
$NextYearToRead++;
$IsReadingOneAllArticlesPage = true;
fwrite( $stderr, "Processing the wikipage for $YearCurrentlyBeingRead \n\n" );
}
}
else if( $IsReadingSummariesPages && $NextYearToRead <= $FinalYearToRead )
{
// check if the page title is the next year we want to read
if
(
( $NextYearToRead == 2006 && substr_count($CurrentTitle, "Less Wrong/2006 Articles/Summaries") > 0 ) ||
( $NextYearToRead == 2007 && substr_count($CurrentTitle, "Less Wrong/2007 Articles/Summaries") > 0 ) ||
( $NextYearToRead == 2008 && substr_count($CurrentTitle, "Less Wrong/2008 Articles/Summaries") > 0 ) ||
( $NextYearToRead == 2009 && substr_count($CurrentTitle, "Less Wrong/2009 Articles/Summaries") > 0 ) ||
( $NextYearToRead == 2010 && substr_count($CurrentTitle, "Less Wrong/2010 Articles/Summaries") > 0 )
//todo - update this in 2011!!!
)
{
$YearCurrentlyBeingRead = $NextYearToRead;
$NextYearToRead++;
$IsReadingOneSummariesPage = true;
fwrite( $stderr, "Processing the summaries page for $YearCurrentlyBeingRead \n\n" );
}
}
else if( $IsReadingConceptPages )
{
// don't process the All Articles pages!
if
(
substr_count($CurrentTitle, "Less Wrong/2006 Articles") <= 0 &&
substr_count($CurrentTitle, "Less Wrong/2007 Articles") <= 0 &&
substr_count($CurrentTitle, "Less Wrong/2008 Articles") <= 0 &&
substr_count($CurrentTitle, "Less Wrong/2009 Articles") <= 0 &&
substr_count($CurrentTitle, "Less Wrong/2010 Articles") <= 0
//todo - update this in 2011!!!
)
{
// no special processing here, just remember the title
$IsReadingOneConceptPage = true;
if( $PagesRead % 100 == 0 )
{
fwrite( $stderr, "Pages read: $PagesRead\n" );
}
//fwrite( $stderr, "Found the article $CurrentTitle \n\n" );
$PagesRead++;
}
}
else if( $IsReadingAllArticlesPages )
{
//fwrite( $stderr, "reading All Articles pages, past the final year\n\n" );
}
else if( $IsReadingSummariesPages )
{
//fwrite( $stderr, "reading summary pages, past the final year\n\n" );
}
else
{
fwrite( $stderr, "Error: not reading all articles, summaries, or concept pages\n\n" );
}
}
}
// if we're reading the text, then store the content of the article
// we'll process the data in endElement()
// there is probably a more efficient way to do this
if( $IsReadingText )
{
if( $IsReadingOneAllArticlesPage )
{
$CurrentBody .= $data;
}
if( $IsReadingOneSummariesPage )
{
$CurrentBody .= $data;
}
if( $IsReadingOneConceptPage )
{
$CurrentBody .= $data;
}
}
}
function endElement($parser, $name)
{
global $stderr;
global $XMLfile;
global $NextAllArticleIndex;
global $ArrayAllArticleTitle ;
global $ArrayAllArticleLink ;
global $ArrayAllArticleIndexedConcepts ;
global $ArrayAllArticleFoundConcepts ;
global $ArrayAllArticleNotIndexedConcepts;
global $ArrayAllArticleAuthor ;
global $ArrayAllArticleDate ;
global $ArrayAllArticleOfficialSummaries ;
global $ArrayAllArticleUsedSummaries ;
global $ConceptFound;
global $ConceptNotInIndex;
global $ConceptThatAreRedirects;
global $ConceptSeeAlso;
global $PagesWithOvercomingBiasLinks;
global $PagesWithComments;
global $PagesWithOvercomingBiasArticlesHeader;
global $PagesWithExternalReferences;
global $PagesWithSeeAlso;
global $PagesWithExternalAuthorLinks;
global $PagesWithNewlineAfterWikiLink;
global $PagesWithSeeAlsoBeforeBlogPosts;
global $ArticleLinksWithoutEndingSlash;
global $ArticleLinksWithWrongTitle;
global $ArticleLinksWithoutAuthor;
global $ArticleLinksWithAvailableSummary;
global $ArticleLinksWithoutEndingSlashConcept;
global $ArticleLinksWithWrongTitleConcept;
global $ArticleLinksWithoutAuthorConcept;
global $ArticleLinksWithAvailableSummaryConcept;
global $ArticleLinksWithoutEndingSlashNextIndex;
global $ArticleLinksWithWrongTitleNextIndex;
global $ArticleLinksWithoutAuthorNextIndex;
global $ArticleLinksWithAvailableSummaryNextIndex;
global $CurrentTitle;
global $CurrentBody ;
global $IsReadingTitle;
global $IsReadingText ;
global $IsReadingAllArticlesPages;
global $IsReadingSummariesPages ;
global $IsReadingConceptPages ;
global $IsReadingOneAllArticlesPage;
global $IsReadingOneSummariesPage ;
global $IsReadingOneConceptPage ;
global $FirstYearToRead;
global $FinalYearToRead;
global $NextYearToRead;
global $YearCurrentlyBeingRead;
global $SuccessfullyReadOneAllArticlesPage;
global $SuccessfullyReadOneSummariesPage ;
global $FailedToReadAllArticlesPage ;
global $SuccessfullyReadOneConceptPage;
global $FailedToReadConceptPage;
global $PagesRead;
global $Debug;
// now process all the text that we read
if( $name == "TEXT" )
{
// this check is probably unnecessary
if( $IsReadingText )
{
if( $IsReadingOneAllArticlesPage )
{
//fwrite( $stderr, "-------------------------------------------reading body of year $YearCurrentlyBeingRead , $CurrentTitle \n\n" );
//fwrite( $stderr, $CurrentBody );
// now parse the data from this page and store it into the arrays
$CurrentGroupStartPos = 0;
// skip to the first '|-'
$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 );
// keep going until there are no more groups
while( $NextGroupStartPos !== FALSE && $CurrentGroupStartPos != $NextGroupStartPos )
{
// find the start and end of teh current group
$CurrentGroupStartPos = $NextGroupStartPos;
$NextGroupStartPos = strpos( $CurrentBody, "|-", $CurrentGroupStartPos+1 );
// if there is no next group, then set the end of this group to the end of the whole string
if( $NextGroupStartPos === FALSE )
{
$NextGroupStartPos = strlen( $CurrentBody ) - 1;
}
// now find and store the parts
// there is probably a much simpler way to do all this using regexes
$DataIsValid = true;
$CurrentArticleConceptArray = array();
$TokenStartPos = strpos($CurrentBody, "[", $CurrentGroupStartPos) + 1;
$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);
$CurrentArticleLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
if( substr_count( $CurrentArticleLink, "http://" ) <= 0 )
{
$DataIsValid = false;
}
$TokenStartPos = $TokenEndPos + 1;
$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos);
$CurrentArticleTitle = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
$CurrentArticleTitle = str_replace(""", "\"", $CurrentArticleTitle);
$NextLineStartPos = strpos($CurrentBody, "|", $TokenEndPos);
$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1);
$ConceptLine = substr( $CurrentBody, $NextLineStartPos, $NextLineEndPos-$NextLineStartPos );
//fwrite( $stderr, "concept line start: $NextLineStartPos \n concept line end: $NextLineEndPos \n concept line: $ConceptLine\n\n" );
$FinishedFindingConcepts = false;
$ConceptIndex = 0;
$TokenStartPos = 0;
$TokenEndPos = 0;
while( !$FinishedFindingConcepts )
{
$TokenStartPos = strpos($ConceptLine, "[[", $TokenEndPos);
$TokenEndPos = strpos($ConceptLine, "]]", $TokenStartPos);
$CurrentConcept = substr( $ConceptLine, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 );
if
(
$TokenEndPos > $TokenStartPos &&
substr_count($CurrentConcept, "[[") <= 0 &&
substr_count($CurrentConcept, "]]") <= 0
)
{
//fwrite( $stderr, "found concept: $CurrentConcept\n\n" );
$CurrentArticleConceptArray[$ConceptIndex] = $CurrentConcept;
$ConceptIndex++;
$FinishedFindingConcepts = false;
}
else
{
$FinishedFindingConcepts = true;
}
//fwrite( $stderr, "concept start: $TokenStartPos \n concept end: $TokenEndPos \n concept index: $ConceptIndex \n concept line: $ConceptLine\n\n" );
}
$NextLineStartPos = strpos($CurrentBody, "|", $NextLineEndPos);
$NextLineEndPos = strpos($CurrentBody, "|", $NextLineStartPos+1);
$TokenStartPos = strpos($CurrentBody, "[", $NextLineStartPos);
$TokenEndPos = strpos($CurrentBody, "]", $TokenStartPos);
$CurrentArticleAuthor = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos+1 );
if( $DataIsValid )
{
$ArrayAllArticleTitle [$NextAllArticleIndex] = $CurrentArticleTitle;
$ArrayAllArticleLink [$NextAllArticleIndex] = $CurrentArticleLink;
//$CurrentArticleConceptArray[] = "concept1";
$ArrayAllArticleIndexedConcepts [$NextAllArticleIndex] = $CurrentArticleConceptArray;
$ArrayAllArticleFoundConcepts [$NextAllArticleIndex] = array();
$ArrayAllArticleNotIndexedConcepts [$NextAllArticleIndex] = array();
$ArrayAllArticleAuthor [$NextAllArticleIndex] = $CurrentArticleAuthor;
$ArrayAllArticleDate [$NextAllArticleIndex] = "date";
$ArrayAllArticleUsedSummaries [$NextAllArticleIndex] = array();
//$TempString = print_r($ArrayAllArticleIndexedConcepts[$NextAllArticleIndex]);
//fwrite( $stderr, "start: $CurrentGroupStartPos \n end: $NextGroupStartPos \n index: $NextAllArticleIndex \n $ArrayAllArticleTitle[$NextAllArticleIndex] \n $ArrayAllArticleLink[$NextAllArticleIndex] \n $TempString \n $ArrayAllArticleAuthor[$NextAllArticleIndex] \n $ArrayAllArticleDate[$NextAllArticleIndex] \n\n" );
$NextAllArticleIndex++;
}
}
$SuccessfullyReadOneAllArticlesPage = true;
}
if( $IsReadingOneSummariesPage )
{
//fwrite( $stderr, "reading summaries page for $YearCurrentlyBeingRead \n\n" );
$NextSummaryTitleStart = 0;
$NextSummaryTitleEnd = 0;
$NextSummaryTextBlockStart = 0;
$NextSummaryTextBlockEnd = 0;
$NextSummaryTextStart = 0;
$NextSummaryTextEnd = 0;
$AlternateSummaryCount = 0;
$FinishedReadingSummaries = false;
while( !$FinishedReadingSummaries )
{
$NextSummaryTitleStart = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd);
//fwrite( $stderr, "CurrentBody: $CurrentBody \n\n" );
//fwrite( $stderr, "NextSummaryTitleStart: $NextSummaryTitleStart \n\n" );
if( $NextSummaryTitleStart === FALSE )
{
$FinishedReadingSummaries = true;
}
else
{
$NextSummaryTitleStart += 6;
$NextSummaryTitleEnd = strpos($CurrentBody, "]=====", $NextSummaryTitleStart);
$FirstSpacePos = strpos($CurrentBody, " ", $NextSummaryTitleStart);
$CurrentSummaryLink = substr( $CurrentBody, $NextSummaryTitleStart, $FirstSpacePos-$NextSummaryTitleStart );
$CurrentSummaryTitle = substr( $CurrentBody, $FirstSpacePos+1, $NextSummaryTitleEnd-$FirstSpacePos-1 );
$CurrentSummaryTitle = str_replace(""", "\"", $CurrentSummaryTitle);
//fwrite( $stderr, "reading summary of $CurrentSummaryTitle \n\n" );
$NextSummaryTextBlockStart = $NextSummaryTitleEnd + 8;
$NextSummaryTextBlockEnd = strpos($CurrentBody, "=====[", $NextSummaryTitleEnd);
if( $NextSummaryTextBlockEnd === FALSE )
{
$NextSummaryTextBlockEnd = strlen($CurrentBody);
}
$NextSummaryTextBlock = substr( $CurrentBody, $NextSummaryTextBlockStart, $NextSummaryTextBlockEnd-$NextSummaryTextBlockStart );
$AlternateSummaryCount = substr_count( $NextSummaryTextBlock, "(alternate summary:)" );
$NextSummaryTextEnd = 0;
$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle] = array();
for( $AlternateSummaryNum = 0; $AlternateSummaryNum < $AlternateSummaryCount; $AlternateSummaryNum++ )
{
$NextSummaryTextStart = $NextSummaryTextEnd;
$NextSummaryTextEnd = strpos( $NextSummaryTextBlock, "\n\n(alternate summary:)\n\n", $NextSummaryTextStart );
$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart, $NextSummaryTextEnd-$NextSummaryTextStart );
$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryNum] = $SummaryToAdd;
$NextSummaryTextEnd += strlen( "\n\n(alternate summary:)\n\n" );
}
$NextSummaryTextStart = $NextSummaryTextEnd;
$SummaryToAdd = substr( $NextSummaryTextBlock, $NextSummaryTextStart );
$SummaryToAdd = trim( $SummaryToAdd, "\n" );
if( strlen( $SummaryToAdd ) > 0 )
{
if( strpos( $SummaryToAdd, "__NOTOC__" ) !== FALSE )
{
//todo - decide how to deal with this case!
}
else
{
$ArrayAllArticleOfficialSummaries[$CurrentSummaryTitle][$AlternateSummaryCount] = $SummaryToAdd;
}
}
}
}
$SuccessfullyReadOneSummariesPage = true;
}
if( $IsReadingOneConceptPage )
{
//fwrite( $stderr, "------reading body of $CurrentTitle \n\n" );
// don't bother processing the page unless it has a "Blog posts" section
$TokenStartPos = 0;
$TokenEndPos = 0;
// keep track of which pages have links to OvercomingBias.com articles
if( substr_count( $CurrentBody, "http://www.overcomingbias.com/20" ) > 0 )
{
// the following pages are "allowed" to have OvercomingBias.com articles - these pages were already checked manually for links that should point to lesswrong.com instead
if
(
$CurrentTitle == "Aumann's agreement theorem" ||
$CurrentTitle == "Bias" ||
$CurrentTitle == "Bite the bullet" ||
$CurrentTitle == "Black swan" ||
$CurrentTitle == "Catch Phrases" ||
$CurrentTitle == "Chat Logs/2009-04-11" ||
$CurrentTitle == "Cognitive style" ||
$CurrentTitle == "Coherence" ||
$CurrentTitle == "Connotation" ||
$CurrentTitle == "Consistency" ||
$CurrentTitle == "Cryonics" ||
$CurrentTitle == "Dark arts" ||
$CurrentTitle == "Disagreement" ||
$CurrentTitle == "Disagreements on Less Wrong" ||
$CurrentTitle == "Emotion" ||
$CurrentTitle == "Extraordinary evidence" ||
$CurrentTitle == "Forecast" ||
$CurrentTitle == "Hypocrisy" ||
$CurrentTitle == "Impossible world" ||
$CurrentTitle == "Intellectual roles" ||
$CurrentTitle == "Likelihood ratio" ||
$CurrentTitle == "Meme lineage" ||
$CurrentTitle == "Modesty argument" ||
$CurrentTitle == "Near/far thinking" ||
$CurrentTitle == "Overcoming Bias" ||
$CurrentTitle == "Overconfidence" ||
$CurrentTitle == "Prediction market" ||
$CurrentTitle == "Scales of justice fallacy" ||
$CurrentTitle == "Series" ||
$CurrentTitle == "Signaling" ||
$CurrentTitle == "Signalling" ||
$CurrentTitle == "Status" ||
$CurrentTitle == "Stereotype" ||
false
)
{
// do nothing
}
else
{
$PagesWithOvercomingBiasLinks[$CurrentTitle] = true;
}
}
if( substr_count( $CurrentBody, "<!--" ) > 0 )
{
// the following pages are "allowed" to have comments
if
(
$CurrentTitle == "LessWrong Wiki"
)
{
// do nothing
}
else
{
$PagesWithComments[$CurrentTitle] = true;
}
}
if( substr_count( $CurrentBody, "==Overcoming Bias Articles==" ) > 0 )
{
$PagesWithOvercomingBiasArticlesHeader[$CurrentTitle] = true;
}
if( substr_count( $CurrentBody, "External references" ) > 0 )
{
$PagesWithExternalReferences[$CurrentTitle] = true;
}
if( substr_count( $CurrentBody, "See Also" ) > 0 )
{
$PagesWithSeeAlso[$CurrentTitle] = true;
}
if( substr_count( $CurrentBody, "by [http" ) > 0 )
{
$PagesWithExternalAuthorLinks[$CurrentTitle] = true;
}
if( substr_count( $CurrentBody, "wikilink}}\n\n" ) > 0 )
{
$PagesWithNewlineAfterWikiLink[$CurrentTitle] = true;
}
$SeeAlsoPos = strpos($CurrentBody, "==See also==");
$BlogPostsPos = strpos($CurrentBody, "==Blog posts==");
if( $SeeAlsoPos !== FALSE && $BlogPostsPos !== FALSE )
{
if( $SeeAlsoPos < $BlogPostsPos )
{
$PagesWithSeeAlsoBeforeBlogPosts[$CurrentTitle] = true;
}
}
if( substr_count( $CurrentBody, "#REDIRECT" ) > 0 )
{
if( array_key_exists( $CurrentTitle, $ConceptFound ) )
{
$ConceptThatAreRedirects[$CurrentTitle] = true;
}
}
//todo - reconsider this!!!
//if( substr_count( $CurrentArticleLink, "==Blog posts==" ) > 0 )
if( substr_count( $CurrentBody, "http://lesswrong.com/lw/" ) > 0 )
{
// first check if the concept exists in the index
$ConceptIsInIndex = false;
if( array_key_exists( $CurrentTitle, $ConceptFound ) )
{
$ConceptIsInIndex = true;
}
if( $ConceptIsInIndex )
{
// if the concept exists in the index, then mark the concept as found
$ConceptFound[$CurrentTitle] = true;
//fwrite( $stderr, "++++++found: $CurrentTitle\n\n" );
}
else
{
// if the concept doesn't exist in the index, then mark the concept as not found
$ConceptNotInIndex[$CurrentTitle] = true;
}
// now keep track of the "See also" links
if( array_key_exists($CurrentTitle, $ConceptFound ) )
{
if( $ConceptFound[$CurrentTitle] == true )
{
$SeeAlsoStartPos = strpos($CurrentBody, "==See also==", 0);
if( $SeeAlsoStartPos !== FALSE )
{
$SeeAlsoStartPos += 12;
}
else
{
$SeeAlsoStartPos = strpos($CurrentBody, "==Related concepts==", 0);
if( $SeeAlsoStartPos !== FALSE )
{
$SeeAlsoStartPos += 20;
}
else
{
}
}
if( $SeeAlsoStartPos !== FALSE )
{
$SeeAlsoEndPos = strpos($CurrentBody, "==", $SeeAlsoStartPos);
if( $SeeAlsoEndPos === FALSE )
{
$SeeAlsoEndPos = strlen($CurrentBody);
}
$TokenStartPos = $SeeAlsoStartPos;
$TokenEndPos = $SeeAlsoStartPos;
$DoneSeeAlso = false;
$EntriesFound = 0;
$ConceptSeeAlso[$CurrentTitle] = array();
//fwrite( $stderr, "Concept: " . $CurrentTitle . "\n" );
while( !$DoneSeeAlso )
{
$TokenStartPos = strpos($CurrentBody, "[[", $TokenEndPos);
$TokenEndPos = strpos($CurrentBody, "]]", $TokenStartPos);
if
(
$TokenStartPos < $SeeAlsoEndPos &&
$TokenStartPos >= $SeeAlsoStartPos &&
$TokenStartPos !== FALSE &&
$TokenEndPos !== FALSE
)
{
$CurrentSeeAlso = substr( $CurrentBody, $TokenStartPos+2, $TokenEndPos-$TokenStartPos-2 );
if( substr_count( $CurrentSeeAlso, "Category:") <= 0 )
{
$ConceptSeeAlso[$CurrentTitle][$EntriesFound] = $CurrentSeeAlso;
//fwrite( $stderr, $ConceptSeeAlso[$CurrentTitle][$EntriesFound] . "\n" );
$EntriesFound++;
}
}
else
{
$DoneSeeAlso = true;
}
//fwrite( $stderr, "SeeAlsoStartPos: $SeeAlsoStartPos SeeAlsoEndPos: $SeeAlsoEndPos TokenStartPos: $TokenStartPos TokenEndPos $TokenEndPos" . "\n" );
}
}
}
}
$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);
$NextTokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
if( $NextTokenStartPos === FALSE )
{
$NextTokenStartPos = strlen($CurrentBody);
}
$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
$FinishedFindingLinks = false;
while( !$FinishedFindingLinks )
{
// check if the link appears before or after the first header
$FirstHeaderPos = strpos($CurrentBody, "==", 0);
$LinkIsAfterHeader = false;
if( $FirstHeaderPos === FALSE )
{
$LinkIsAfterHeader = false;
}
else
{
if( $FirstHeaderPos < $TokenStartPos )
{
$LinkIsAfterHeader = true;
}
else
{
$LinkIsAfterHeader = false;
}
}
// ignore the following links:
if
(
substr_count( $CurrentLink, "http://lesswrong.com/lw/b1/persuasiveness_vs_soundness/789") <= 0 &&
substr_count( $CurrentLink, "http://lesswrong.com/lw/14v/the_usefulness_of_correlations/11iu") <= 0
)
{
// check if the link ends with a /
$LastCharInLink = substr( $CurrentLink, -1 );
$SlashCountBeforeAdd = substr_count( $CurrentLink, "/");
if
(
strcmp( $LastCharInLink, "/" ) != 0
)
{
$CurrentLink .= "/";
}
// check if the link's title is correct
$TitleStartPos = $TokenEndPos+1;
$TitleEndPos = strpos($CurrentBody, "]", $TitleStartPos);
$CurrentArticleTitle = substr( $CurrentBody, $TitleStartPos, $TitleEndPos-$TitleStartPos );
$CorrectArticleTitle = "(not found)";
$CorrectArticleAuthor = "(not found)";
$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );
if( $SearchResult !== FALSE )
{
$CorrectArticleTitle = $ArrayAllArticleTitle [$SearchResult];
$CorrectArticleAuthor = $ArrayAllArticleAuthor[$SearchResult];
//don't do any processing for summaries unless the link we're looking at is after the first header
if( $LinkIsAfterHeader )
{
$LinkEndPos = strpos($CurrentBody, "]", $TokenStartPos) + 1;
$NewLinePos = strpos($CurrentBody, "\n", $LinkEndPos);
$NextArticleSummary = "";
if( $NewLinePos === FALSE )
{
$NextArticleSummary = substr( $CurrentBody, $LinkEndPos );
}
else
{
//$NextArticleSummary .= "\nsummary:\n" + substr( $CurrentBody, $TokenStartPos, $NextTokenStartPos-$TokenStartPos );
$NextArticleSummary = substr( $CurrentBody, $LinkEndPos, $NewLinePos-$LinkEndPos );
}
// search through the array of official summaries for this article, to see if any of them are found in the current article
// if a match is found, then don't add the summary to the array of used summaries
$SummaryExistsForThisArticle = false;
$FoundAMatchingSummary = false;
$FirstFoundSummary = "";
if( array_key_exists( $CorrectArticleTitle, $ArrayAllArticleOfficialSummaries ) )
{
$SummaryExistsForThisArticle = true;
foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val )
{
if( substr_count( $CurrentBody, $val ) > 0 )
{
if( !$FoundAMatchingSummary )
{
$FirstFoundSummary = $val;
}
$FoundAMatchingSummary = true;
}
}
}
$AddThisSummaryToTheArray = false;
if( strpos( $NextArticleSummary, "by [[" ) > 0 )
{
//todo - consider adding a check for improperly formatted author links
//todo - consider adding a check for an author link followed by a summary
//todo - consider adding a check for a summary that doesn't appear until after the newline
}
else if( strlen( $NextArticleSummary ) > 0 )
{
if( $FoundAMatchingSummary )
{
// if we found a matching summary, then we don't need to do anything here
}
else
{
$AddThisSummaryToTheArray = true;
}
}
else if( $SummaryExistsForThisArticle )
{
// a summary is available for this article, but the summary isn't on this page.
// report this to
//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0];
//$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $FirstFoundSummary;
// just accessing the array directly doesn't work, and neither did that $FirstFoundSummary trick. for some reason, I need to use a foreach
//fwrite( $stderr, "$FirstFoundSummary\n" );
//fwrite( $stderr, "$ArrayAllArticleOfficialSummaries[$CorrectArticleTitle][0]\n" );
$IsFirstIteration = true;
foreach( $ArrayAllArticleOfficialSummaries[$CorrectArticleTitle] as $key => $val )
{
if( $IsFirstIteration )
{
$ArticleLinksWithAvailableSummary[$ArticleLinksWithAvailableSummaryNextIndex] = "*[$CurrentLink $CorrectArticleTitle] - " . $val;
$IsFirstIteration = false;
}
//fwrite( $stderr, "$val\n" );
}
$ArticleLinksWithAvailableSummaryConcept[$ArticleLinksWithAvailableSummaryNextIndex] = $CurrentTitle;
$ArticleLinksWithAvailableSummaryNextIndex++;
}
if( $AddThisSummaryToTheArray )
{
// ignore known invalid summaries
if( strlen( $NextArticleSummary ) > 10 )
{
$NextArticleSummary = trim( $NextArticleSummary, " ,-—'" );
if
(
substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]''" ) > 0 ||
substr_count( $NextArticleSummary, "(and [http://lesswrong.com/lw/ht/beware_the_unsurprised/ Beware the Unsurprised])" ) > 0 ||
substr_count( $NextArticleSummary, "In particular, the [[Litany of Tarski]]." ) > 0 ||
substr_count( $NextArticleSummary, "(but first read: [http://lesswrong.com/lw/m2/the_litany_against_gurus/ The Litany Against Gurus])" ) > 0 ||
substr_count( $NextArticleSummary, ", [http://lesswrong.com/lw/" ) > 0 ||
substr_count( $NextArticleSummary, "and [http://lesswrong.com/lw/" ) > 0 ||
substr_count( $NextArticleSummary, "'' (prerequisite: [http://lesswrong.com/lw" ) > 0 ||
substr_count( $NextArticleSummary, "'' and ''[http://lesswrong.com/lw" ) > 0 ||
strcmp( $NextArticleSummary, "setting up the problem." ) == 0 ||
strcmp( $NextArticleSummary, "[[Eliezer Yudkowsky]]" ) == 0 ||
strcmp( $NextArticleSummary, "by Salamon and Rayhawk." ) == 0 ||
strcmp( $NextArticleSummary, "by [[Eliezer Yudkowsky]]" ) == 0 ||
strcmp( $NextArticleSummary, "by talisman" ) == 0 ||
strcmp( $NextArticleSummary, "(short story)" ) == 0 ||
strcmp( $NextArticleSummary, "of a particular study design. Debiasing [http://lesswrong.com/lw/jk/burdensome_details/ won't be as simple] as practicing specific questions, it requires certain general habits of thought." ) == 0 ||
strcmp( $NextArticleSummary, "as practicing specific questions, it requires certain general habits of thought." ) == 0 ||
strcmp( $NextArticleSummary, "'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
strcmp( $NextArticleSummary, "all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
strcmp( $NextArticleSummary, "to this post tries to explain the cognitive twists whereby people [http://lesswrong.com/lw/ld/the_hidden_complexity_of_wishes/ smuggle] all of their complicated ''other'' preferences into their choice of ''exactly'' which acts they try to ''[http://lesswrong.com/lw/kq/fake_justification/ justify using]'' their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/n1/allais_malaise/ followups]) - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 ||
strcmp( $NextArticleSummary, ") - Offered choices between gambles, people make decision-theoretically inconsistent decisions." ) == 0 ||
strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/oo/explaining_vs_explaining_away/ Explaining vs. Explaining Away]'' - elementary [[reductionism]]." ) == 0 ||
strcmp( $NextArticleSummary, "\" which essentially answered \"Not on the present state of the Art\"" ) == 0 ||
strcmp( $NextArticleSummary, "(and its [[Privileging the hypothesis | requisites]], like [[Locating the hypothesis]])" ) == 0 ||
strcmp( $NextArticleSummary, "and ''[http://lesswrong.com/lw/hm/new_improved_lottery/ New Improved Lottery]" ) == 0 ||
strcmp( $NextArticleSummary, "their single principle; but if they were ''really'' following ''only'' that single principle, they would [http://lesswrong.com/lw/kz/fake_optimization_criteria/ choose other acts to justify]." ) == 0 ||
strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/w6/recursion_magic/ ...Recursion, Magic]" ) == 0 ||
strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wf/hard_takeoff/ Hard Takeoff]" ) == 0 ||
strcmp( $NextArticleSummary, "[http://lesswrong.com/lw/wg/permitted_possibilities_locality/ Permitted Possibilities, & Locality]" ) == 0 ||
strcmp( $NextArticleSummary, "(in the martial arts)" ) == 0 ||
strcmp( $NextArticleSummary, "(in both psychotherapy and martial arts)" ) == 0 ||
strcmp( $NextArticleSummary, "Description and account of the game." ) == 0 ||
false
)
{
// don't add the invalid summary
}
else
{
$ArrayAllArticleUsedSummaries[$SearchResult][] = $NextArticleSummary;
}
}
}
}
}
// for authors who have their own wikipages, link to the wikipage instead of the LW user page
$CorrectArticleAuthor = str_replace("[http://lesswrong.com/user/Eliezer_Yudkowsky Eliezer_Yudkowsky]", "[[Eliezer Yudkowsky]]", $CorrectArticleAuthor);
// check if the link ends with a /
if
(
strcmp( $LastCharInLink, "/" ) != 0 &&
$SlashCountBeforeAdd < 6
)
{
$ArticleLinksWithoutEndingSlash[$ArticleLinksWithoutEndingSlashNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor";
$ArticleLinksWithoutEndingSlashConcept[$ArticleLinksWithoutEndingSlashNextIndex] = $CurrentTitle;
$ArticleLinksWithoutEndingSlashNextIndex++;
}
if( strcmp($CurrentArticleTitle, $CorrectArticleTitle) != 0 && $LinkIsAfterHeader )
{
// don't report an error for the following links:
if
(
substr_count( $CurrentArticleTitle, "ranges over anything, not just internal subjective experiences") <= 0 &&
substr_count( $CurrentArticleTitle, "sequence leading up") <= 0 &&
substr_count( $CurrentArticleTitle, "smuggle") <= 0 &&
substr_count( $CurrentArticleTitle, "justify using") <= 0 &&
substr_count( $CurrentArticleTitle, "choose other acts to justify") <= 0 &&
substr_count( $CurrentArticleTitle, "Timeless decision theory") <= 0 &&
substr_count( $CurrentArticleTitle, "philosophical majoritarianism") <= 0 &&
substr_count( $CurrentArticleTitle, "critical comments") <= 0 &&
substr_count( $CurrentArticleTitle, "Positive Bias") <= 0 &&
substr_count( $CurrentArticleTitle, "Hindsight Bias") <= 0 &&
substr_count( $CurrentArticleTitle, "not an isolated artifact") <= 0 &&
substr_count( $CurrentArticleTitle, "won't be as simple") <= 0 &&
substr_count( $CurrentArticleTitle, "Illusion of Transparency") <= 0 &&
substr_count( $CurrentArticleTitle, "Affect Heuristic") <= 0 &&
substr_count( $CurrentArticleTitle, "Evaluability") <= 0 &&
substr_count( $CurrentArticleTitle, "Unbounded Scales, Huge Jury Awards, and Futurism") <= 0 &&
substr_count( $CurrentArticleTitle, "subsequent") <= 0 &&
substr_count( $CurrentArticleTitle, "followups") <= 0 &&
substr_count( $CurrentArticleTitle, "Do We Believe <i>Everything</i> We're Told?") <= 0 &&
substr_count( $CurrentArticleTitle, "Quantum Physics") <= 0 &&
substr_count( $CurrentArticleTitle, "Shut Up and Do the Impossible") <= 0 &&
substr_count( $CurrentArticleTitle, "You ''Can'' Face Reality") <= 0 &&
substr_count( $CurrentArticleTitle, "Absence of Evidence ''Is'' Evidence of Absence") <= 0 &&
substr_count( $CurrentArticleTitle, "Doublethink: Choosing to be Biased") <= 0 &&
substr_count( $CurrentArticleTitle, "Anti-Epistemology") <= 0 &&
substr_count( $CurrentArticleTitle, "Is Humanism a Religion-Substitute?") <= 0 &&
substr_count( $CurrentArticleTitle, "Your Strength As A Rationalist") <= 0 &&
substr_count( $CurrentArticleTitle, "Absence of Evidence '''is''' Evidence of Absence") <= 0 &&
substr_count( $CurrentArticleTitle, "Reversed Stupidity is Not Intelligence") <= 0 &&
substr_count( $CurrentArticleTitle, "A Human's Guide to Words") <= 0 &&
substr_count( $CurrentArticleTitle, "here") <= 0 &&
true
)
{
$ArticleLinksWithWrongTitle[$ArticleLinksWithWrongTitleNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(title was [$CurrentLink $CurrentArticleTitle])";
$ArticleLinksWithWrongTitleConcept[$ArticleLinksWithWrongTitleNextIndex] = $CurrentTitle;
$ArticleLinksWithWrongTitleNextIndex++;
if( $Debug )
{
//fwrite( $stderr, "article title: $CurrentArticleTitle\ncorrect title: $CorrectArticleTitle\n\n" );
}
}
}
//fwrite( $stderr, "article title: $CurrentArticleTitle\n\n" );
$ByCharacters = substr( $CurrentBody, $TitleEndPos, 6 );
$FirstHeaderPos = strpos($CurrentBody, "==", 0);
// check if the link has an author
// ignore missing authors if the link appears before any headers
if( strcmp($ByCharacters, "] by [") != 0 && $LinkIsAfterHeader )
{
$LineUpToAuthor = substr( $CurrentBody, $TokenStartPos-1, ($TitleEndPos+6)-($TokenStartPos-1) );
$LineUpToAuthor = str_replace("\n", "\\n", $LineUpToAuthor);
$ArticleLinksWithoutAuthor[$ArticleLinksWithoutAuthorNextIndex] = "*[$CurrentLink $CorrectArticleTitle] by $CorrectArticleAuthor\n**(was $LineUpToAuthor)";
$ArticleLinksWithoutAuthorConcept[$ArticleLinksWithoutAuthorNextIndex] = $CurrentTitle;
$ArticleLinksWithoutAuthorNextIndex++;
if( $Debug )
{
//fwrite( $stderr, "Concept: $CurrentTitle, by characters: <$ByCharacters>\n\n" );
}
}
//todo - change this to check if the author is correct!
}
//fwrite( $stderr, "link start: $TokenStartPos \n link end: $TokenEndPos \n current link: $CurrentLink\n\n" );
// find the current link in the array, if it exists
$SearchResult = array_search( $CurrentLink, $ArrayAllArticleLink );
if( $SearchResult !== FALSE )
{
// find if this concept exists in the array
if( in_array( $CurrentTitle, $ArrayAllArticleIndexedConcepts[$SearchResult] ) )
{
// mark the link as found
$ArrayAllArticleFoundConcepts [$SearchResult][$CurrentTitle] = true;
}
else
{
// add the concept to the array of unindexed concepts
$ArrayAllArticleNotIndexedConcepts[$SearchResult][] = $CurrentTitle;
}
}
$TokenStartPos = strpos($CurrentBody, "http://lesswrong.com/lw/", $TokenEndPos);
$TokenEndPos = strpos($CurrentBody, " ", $TokenStartPos);
$CurrentLink = substr( $CurrentBody, $TokenStartPos, $TokenEndPos-$TokenStartPos );
if
(
$TokenEndPos <= $TokenStartPos ||
$TokenStartPos === FALSE ||
substr_count( $CurrentLink, "http://lesswrong.com/lw/" ) <= 0
)
{
$FinishedFindingLinks = true;
}
}
}
//$IsReadingConceptPages = false;
//fwrite( $stderr, $CurrentBody );
//unfinished!!!
//for each page:
// first check if the page is in the $ConceptFound list
// read the list of articles in the "Blog posts" section.
// report any links that appear in the All Articles page, for that concept, but don't appear in the wiki page
// report any links that appear in the wiki page, but don't appear in the All Articles page, for that concept
$SuccessfullyReadOneConceptPage = true;
}
}
}
}
function defaultHandler($parser, $data)
{
}
function new_xml_parser($file)
{
global $parser_file;
$xml_parser = xml_parser_create();
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");
xml_set_default_handler($xml_parser, "defaultHandler");
if( !($fp = @fopen($file, "r")) )
{
return false;
}
if( !is_array($parser_file) )
{
settype($parser_file, "array");
}
$parser_file[$xml_parser] = $file;
return array($xml_parser, $fp);
}
function ReadOnceThroughTheWholeXMLFile()
{
global $XMLfile;
// create the XML parser
if( !(list($xml_parser, $fp) = new_xml_parser($XMLfile)) )
{
die("could not open XML input");
}
// read the XML file
while( $data = fread($fp, 4096) )
{
if( !xml_parse($xml_parser, $data, feof($fp)) )
{
die( sprintf("XML error: %s at line %d\n",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
}
}
// we're done with the XML file now, so close it
fclose($fp);
}
fwrite( $stderr, "Reading through the All Articles pages\n\n" );
// first read through the All Articles pages
$IsReadingAllArticlesPages = true;
$NextYearToRead = $FirstYearToRead;
$FailedToReadAllArticlesPage = false;
// keep looping through the whole XML file
// abort when we've successfully read all of the All Articles pages,
// or if we've looped through the whole file without finding the next one
// there's probably a more efficient way than looping repeatedly through the whole XML file,
// but that would probably make the code even more tangled than it is now.
// this looping should be unnecessary, because the All Articles pages should be in order in the XML file, but I'll leave this code as it is for now.
while
(
$NextYearToRead <= $FinalYearToRead &&
! $FailedToReadAllArticlesPage
)
{
fwrite( $stderr, "processing the XML file, year $NextYearToRead \n\n" );
$SuccessfullyReadOneAllArticlesPage = false;
ReadOnceThroughTheWholeXMLFile();
if( ! $SuccessfullyReadOneAllArticlesPage )
{
$FailedToReadAllArticlesPage = true;
fwrite( $stderr, "error: failed to read the next All Articles page, year $NextYearToRead \n\n" );
}
}
$IsReadingAllArticlesPages = false;
fwrite( $stderr, "Reading through the Summaries pages\n\n" );
// next read through the summaries pages
$IsReadingSummariesPages = true;
$NextYearToRead = $FirstYearToRead;
$FailedToReadSummariesPage = false;
// keep looping through the whole XML file
// abort when we've successfully read all of the Summaries pages,
// or if we've looped through the whole file without finding the next one
// there's probably a more efficient way than looping repeatedly through the whole XML file,
// but that would probably make the code even more tangled than it is now.
// this looping should be unnecessary, because the Summaries pages should be in order in the XML file, but I'll leave this code as it is for now.
while
(
$NextYearToRead <= $FinalYearToRead &&
! $FailedToReadSummariesPage
)
{
fwrite( $stderr, "processing the XML file for summaries, year $NextYearToRead \n\n" );
$SuccessfullyReadOneSummariesPage = false;
ReadOnceThroughTheWholeXMLFile();
if( ! $SuccessfullyReadOneSummariesPage )
{
$FailedToReadSummariesPage = true;
fwrite( $stderr, "error: failed to read the next summaries page, year $NextYearToRead \n\n" );
}
}
$IsReadingSummariesPages = false;
fwrite( $stderr, "marking all concepts as not found yet\n\n" );
// now read through the array of concepts, and mark them all as unfound
foreach( $ArrayAllArticleIndexedConcepts as $key => $val )
{
foreach( $val as $key2 => $val2 )
{
$ConceptFound[$val2] = false;
}
}
fwrite( $stderr, "Reading through the Concept pages\n\n" );
// next read through the Concept pages
$IsReadingConceptPages = true;
$FailedToReadConceptPage = false;
// just read once through the whole XML file
// there's no need to read through the concept pages in any particular order
ReadOnceThroughTheWholeXMLFile();
fwrite( $stderr, "Pages read: $PagesRead\n" );
if( ! $SuccessfullyReadOneConceptPage )
{
$FailedToReadConceptPage = true;
fwrite( $stderr, "error: failed to read any Concept page \n\n" );
}
fwrite( $stderr, "Doing final processing, step 1 of 3...\n\n" );
// now go through the $ConceptFound array
// for any concept that doesn't already have a wikipage, output a template for a blank page, with the "Blog posts" section filled in with all of the blog posts that mention this topic
if( false )
{
// this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link
// disabled now because it gives too many false positives
fwrite( $fp, "\n\n==The following concept pages link to OvercomingBias.com articles:==\n\n" );
foreach( $PagesWithOvercomingBiasLinks as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
}
fwrite( $fp, "\n\n==The following concept pages have comments:==\n\n" );
foreach( $PagesWithComments as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
// this section was added just to clean up the pages that use the old OB link for Eliezer's post instead of the new LW link
// disabled now because it gives too many false positives
fwrite( $fp, "\n\n==The following concept pages have the \"Overcoming Bias Articles\" header:==\n\n" );
foreach( $PagesWithOvercomingBiasArticlesHeader as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concept pages have \"External references\" instead of \"References\":==\n\n" );
foreach( $PagesWithExternalReferences as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concept pages have a miscapitalized \"See Also\" header:==\n\n" );
foreach( $PagesWithSeeAlso as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concept pages have an author link that links to an external site:==\n\n" );
foreach( $PagesWithExternalAuthorLinks as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concept pages have an extra newline after the wikilink template:==\n\n" );
foreach( $PagesWithNewlineAfterWikiLink as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concept pages have the See Also section before the Blog Posts section:==\n\n" );
foreach( $PagesWithSeeAlsoBeforeBlogPosts as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
// disabled because of a weird bug that was introduced when I added the "see also" checking
/*
fwrite( $fp, "\n\n==The following article links are missing the / at the end, or aren't in the index:==\n\n" );
$PreviousConcept = "";
foreach( $ArticleLinksWithoutEndingSlash as $key => $val )
{
if( strcmp( $PreviousConcept, $ArticleLinksWithoutEndingSlashConcept[$key] ) != 0 )
{
fwrite( $fp, "\n*[[$ArticleLinksWithoutEndingSlashConcept[$key]]]\n" );
}
fwrite( $fp, "*$val\n" );
$PreviousConcept = $ArticleLinksWithoutEndingSlashConcept[$key];
}
*/
fwrite( $fp, "\n\n==The following article links have a wrong or improperly formatted title:==\n\n" );
$PreviousConcept = "";
foreach( $ArticleLinksWithWrongTitle as $key => $val )
{
if( strcmp( $PreviousConcept, $ArticleLinksWithWrongTitleConcept[$key] ) != 0 )
{
fwrite( $fp, "\n*[[$ArticleLinksWithWrongTitleConcept[$key]]]\n" );
}
fwrite( $fp, "*$val\n" );
$PreviousConcept = $ArticleLinksWithWrongTitleConcept[$key];
}
fwrite( $fp, "\n\n==The following article links have a summary available that was not added to the page:==\n\n" );
$PreviousConcept = "";
foreach( $ArticleLinksWithAvailableSummary as $key => $val )
{
if( strcmp( $PreviousConcept, $ArticleLinksWithAvailableSummaryConcept[$key] ) != 0 )
{
fwrite( $fp, "\n*[[$ArticleLinksWithAvailableSummaryConcept[$key]]]\n" );
}
fwrite( $fp, "*$val\n" );
$PreviousConcept = $ArticleLinksWithAvailableSummaryConcept[$key];
}
if( false )
{
/// this section was disabled because there were way too many false positives
fwrite( $fp, "\n\n==The following article links have a missing or improperly formatted author:==\n\n" );
$PreviousConcept = "";
foreach( $ArticleLinksWithoutAuthor as $key => $val )
{
if( strcmp( $PreviousConcept, $ArticleLinksWithoutAuthorConcept[$key] ) != 0 )
{
fwrite( $fp, "\n*[[$ArticleLinksWithoutAuthorConcept[$key]]]\n" );
}
fwrite( $fp, "*$val\n" );
$PreviousConcept = $ArticleLinksWithoutAuthorConcept[$key];
}
}
fwrite( $fp, "\n\n==The following concepts don't have wikipages with links to LessWrong.com articles yet:==\n\n" );
foreach( $ConceptFound as $key => $val )
{
if( ! $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
}
fwrite( $fp, "\n\n==The following concepts are not in the All Articles pages:==\n\n" );
foreach( $ConceptNotInIndex as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $fp, "\n\n==The following concepts are in the All Articles page, but are redirects:==\n\n" );
foreach( $ConceptThatAreRedirects as $key => $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
fwrite( $stderr, "Doing final processing, step 2 of 3...\n\n" );
// now output the list of which articles in the All Articles index are missing an entry:
fwrite( $fp, "\n\n==The following articles in the [[Less Wrong/All Articles|All Articles]] index are missing an entry:==\n" );
foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val )
{
if( count( $val ) > 0 )
{
fwrite( $fp, "\n*[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]] is missing the following concepts:\n" );
foreach( $val as $key2 => $val2 )
{
fwrite( $fp, "**[[$val2]]\n" );
}
}
}
fwrite( $stderr, "Doing final processing, step 3 of 3...\n\n" );
// now output the list of which article links need to be added to the concept pages:
fwrite( $fp, "\n\n==The following article links need to be added to the concept pages:==\n" );
// for each concept
foreach( $ConceptFound as $key => $val )
{
// make an array to store the missing links
$MissingLinks = array();
// for each article
foreach( $ArrayAllArticleIndexedConcepts as $key2 => $val2 )
{
// check if the article contains the concept
if( array_search($key, $val2) )
{
//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" );
// check if the concept's link was not found
if( ! array_key_exists($key, $ArrayAllArticleFoundConcepts[$key2]) )
{
// add it to the list of concepts whose links were not found
$MissingLinks[] = "**[$ArrayAllArticleLink[$key2] $ArrayAllArticleTitle[$key2]] by $ArrayAllArticleAuthor[$key2]";
//fwrite( $stderr, "concept: $key \n link: $ArrayAllArticleLink[$key2] \n title: $ArrayAllArticleTitle[$key2]\n\n" );
}
}
}
// if there are any not found article links, then output the list
if( count($MissingLinks) > 0 )
{
fwrite( $fp, "\n*[[$key]] is missing the following article links:\n" );
foreach( $MissingLinks as $key3 => $val3 )
{
fwrite( $fp, "$val3\n" );
}
}
}
// now output the list of which article links need to be added to the concept pages:
fwrite( $fp, "\n\n==The following See Also links only go one way:==\n" );
// for each concept
foreach( $ConceptSeeAlso as $key => $val )
{
foreach( $val as $key2 => $val2 )
{
$Concept1 = $key;
$Concept2 = $val2;
$MatchFound = false;
foreach( $ConceptSeeAlso as $key3 => $val3 )
{
foreach( $val3 as $key4 => $val4 )
{
$Concept3 = $key3;
$Concept4 = $val4;
if( $Concept1 == $Concept4 && $Concept2 == $Concept3 )
{
$MatchFound = true;
}
}
}
if( !$MatchFound )
{
fwrite( $fp, "\n*[[$Concept1]] -> [[$Concept2]]" );
}
}
}
fwrite( $fp, "\n\n==The following is a list of all concept pages:==\n\n" );
$SortedConcepts = $ConceptFound;
ksort($SortedConcepts);
foreach( $SortedConcepts as $key => $val )
{
if( $val )
{
fwrite( $fp, "*[[$key]]\n" );
}
}
fwrite( $fp, "\n\n==Links to the All Articles pages:==\n" );
fwrite( $fp, "*[[Less Wrong/All Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2006 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2007 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2008 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2009 Articles]]\n" );
fwrite( $fp, "*[[Less Wrong/2010 Articles]]\n" );
fwrite( $fp, "\n\n==Links to the Summaries pages:==\n" );
fwrite( $fp, "*[[Less Wrong/Article summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2006 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2007 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2008 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2009 Articles/Summaries]]\n" );
fwrite( $fp, "*[[Less Wrong/2010 Articles/Summaries]]\n" );
fclose($fp);
$fp = fopen('ConceptGraph.dot.txt', 'w');
//{_COPYBLOCK1
fwrite($fp, "digraph G {\n\nnode [fontsize=\"$fontsize\"]\n\n");
// output the data for each concept
foreach( $ConceptSeeAlso as $key => $val )
{
$curLine = "";
$NameWithSpaces = $key;
$NameWithoutSpaces = str_replace(" ", "_", $NameWithSpaces);
$curName = "\"" . $NameWithoutSpaces . "\"";
$curName = str_pad( $curName, $PaddingValue1 );
$curLine .= $curName;
$curLine .= "[label=\"";
$curName = $NameWithSpaces;
$curName .= "\"";
$curName = str_pad( $curName, $PaddingValue2 );
$curLine .= $curName;
// $curLine .= ", color=";
//
// $curLine .= $NewArrayScenarioOutlineColors[$key];
//
// $curLine .= ", shape=";
//
// $curLine .= $NewArrayScenarioShapes[$key];
//
// $curLine .= ", style=";
//
// $curLine .= $NewArrayScenarioStyles[$key];
//
// $curLine .= ", fillcolor=\"#";
//
// $curLine .= $NewArrayScenarioFillColors[$key];
//
// $curLine .= "\"";
//if( strcmp($NewArrayScenarioNamesWithoutSpaces[$key], $ArrayScenarioNamesWithoutSpaces[$basekey]) == 0 )
//{
// $curLine .= ", peripheries=3";
//}
$newURL = "http://wiki.lesswrong.com/wiki/" . str_replace(" ", "_", $NameWithSpaces);
$curLine .= ", URL=\"$newURL\"";
$curLine .= "];\n";
fwrite($fp, $curLine);
}
fwrite($fp, "\n\n");
// output the data for the edges
foreach( $ConceptSeeAlso as $key => $val )
{
foreach( $val as $key2 => $val2 )
{
$curLine = "";
$Name1WithSpaces = $key;
$Name1WithoutSpaces = str_replace(" ", "_", $Name1WithSpaces);
$Name2WithSpaces = $val2;
$Name2WithoutSpaces = str_replace(" ", "_", $Name2WithSpaces);
$curName = "\"" . $Name1WithoutSpaces . "\"";
$curName = str_pad( $curName, $PaddingValue1 );
$curLine .= $curName;
$curLine .= "-> ";
$curLine .= "\"" . $Name2WithoutSpaces . "\"";
$curLine .= "\n";
fwrite($fp, $curLine);
}
}
fwrite($fp, "\n\n}");
//}_COPYBLOCK1
fclose($fp);
$fp = fopen('AllArticles.txt', 'w');
fwrite( $stderr, "Outputting new All Articles page...\n\n" );
$ProgressCounter = 0;
foreach( $ArrayAllArticleTitle as $key => $val )
{
$ConceptString = "";
$TempConceptArray = array();
foreach( $ArrayAllArticleIndexedConcepts[$key] as $key2 => $val2 )
{
if( ! in_array($val2, $TempConceptArray) )
{
$ConceptString .= "[[$val2]], ";
$TempConceptArray[] = $val2;
}
}
foreach( $ArrayAllArticleNotIndexedConcepts[$key] as $key2 => $val2 )
{
if( ! in_array($val2, $TempConceptArray) )
{
$ConceptString .= "[[$val2]], ";
$TempConceptArray[] = $val2;
}
}
if( strlen($ConceptString) > 2 )
{
$ConceptString = substr($ConceptString, 0, -2);
}
fwrite( $fp, "|-valign=\"top\"\n" );
fwrite( $fp, "| [$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]\n" );
fwrite( $fp, "| $ConceptString\n" );
fwrite( $fp, "| $ArrayAllArticleAuthor[$key]\n" );
if( $ProgressCounter % 100 == 0 )
{
//fwrite( $stderr, "Progress: $ProgressCounter\n" );
}
$ProgressCounter++;
}
fwrite( $stderr, "Progress: $ProgressCounter\n" );
fclose($fp);
$fp = fopen('ArticleSummaries.txt', 'w');
fwrite( $stderr, "Outputting article summaries...\n\n" );
$ProgressCounter = 0;
foreach( $ArrayAllArticleTitle as $key => $val )
{
$ConceptString = "";
$TempConceptArray = array();
$IsFirstIteration = true;
foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 )
{
if( !$IsFirstIteration )
{
fwrite( $fp, "\n\n(alternate summary:)\n\n" );
}
else
{
fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" );
$IsFirstIteration = false;
}
fwrite( $fp, $val2 );
}
if( $ProgressCounter % 100 == 0 )
{
//fwrite( $stderr, "Progress: $ProgressCounter\n" );
}
$ProgressCounter++;
}
fwrite( $stderr, "Progress: $ProgressCounter\n" );
fclose($fp);
$fp = fopen('ArticleSummaries2.txt', 'w');
fwrite( $stderr, "Outputting article summaries...\n\n" );
$ProgressCounter = 0;
foreach( $ArrayAllArticleTitle as $key => $val )
{
$ConceptString = "";
$TempConceptArray = array();
fwrite( $fp, "\n\n=====[$ArrayAllArticleLink[$key] $ArrayAllArticleTitle[$key]]=====\n\n" );
$IsFirstIteration = true;
if( array_key_exists( $ArrayAllArticleTitle[$key], $ArrayAllArticleOfficialSummaries ) )
{
foreach( $ArrayAllArticleOfficialSummaries[$ArrayAllArticleTitle[$key]] as $key2 => $val2 )
{
if( !$IsFirstIteration )
{
fwrite( $fp, "\n\n(alternate summary:)\n\n" );
}
else
{
$IsFirstIteration = false;
}
fwrite( $fp, $val2 );
}
}
foreach( $ArrayAllArticleUsedSummaries[$key] as $key2 => $val2 )
{
if( !$IsFirstIteration )
{
fwrite( $fp, "\n\n(alternate summary:)\n\n" );
}
else
{
$IsFirstIteration = false;
}
fwrite( $fp, $val2 );
}
if( $ProgressCounter % 100 == 0 )
{
//fwrite( $stderr, "Progress: $ProgressCounter\n" );
}
$ProgressCounter++;
}
fwrite( $stderr, "Progress: $ProgressCounter\n" );
fclose($fp);
//*/
fwrite( $stderr, "Processing Recent Post List...\n\n" );
$handle = fopen('http://lesswrong.com/recentposts', 'r');
$RecentPostRawData = "";
while( ( $buf = fread( $handle, 8192 ) ) != '' )
{
$RecentPostRawData .= $buf;
}
if( $buf === FALSE )
{
return "error reading file";
}
fclose($handle);
$fp = fopen('RecentPosts.txt', 'w');
// read the data from the recent posts page, then output the data in reverse order
$ArrayRecentPostLines = array();
$PostLineStartPos = strpos( $RecentPostRawData, "<h3>", 0);
$PostLineEndPos = $PostLineStartPos;
$EndPos = strpos( $RecentPostRawData, "<p class=\"nextprev\">View more:", 0);
$NextArticleTitle = "";
$IterationCount = 0;
$PastTheEnd = false;
while
(
// ! in_array($NextArticleTitle, $ArrayAllArticleTitle) &&
$IterationCount < 1000 &&
! $PastTheEnd &&
true
)
{
$PostLineStartPos = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9;
$PostLineEndPos = strpos( $RecentPostRawData, "\"", $PostLineStartPos );
$NextArticleLink = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
$PostLineStartPos = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1;
$PostLineEndPos = strpos( $RecentPostRawData, "<", $PostLineStartPos );
$NextArticleTitle = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
$NextArticleTitle = str_replace(""", "\"", $NextArticleTitle);
$PostLineStartPos = strpos( $RecentPostRawData, "<a href=\"", $PostLineEndPos ) + 9;
$PostLineEndPos = strpos( $RecentPostRawData, "\"", $PostLineStartPos );
$NextArticleAuthorLink = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
$PostLineStartPos = strpos( $RecentPostRawData, ">", $PostLineEndPos ) + 1;
$PostLineEndPos = strpos( $RecentPostRawData, "<", $PostLineStartPos );
$NextArticleAuthorName = substr( $RecentPostRawData, $PostLineStartPos, $PostLineEndPos-$PostLineStartPos );
if( $PostLineStartPos > $EndPos )
{
$PastTheEnd = true;
}
else
{
$CurrentLineString = "";
$CurrentLineString .= "|-valign=\"top\"\n";
$CurrentLineString .= "| [http://lesswrong.com$NextArticleLink $NextArticleTitle]\n";
$CurrentLineString .= "| \n";
$CurrentLineString .= "| [$NextArticleAuthorLink $NextArticleAuthorName]\n";
//fwrite( $fp, $CurrentLineString );
$ArrayRecentPostLines[$IterationCount] = $CurrentLineString;
$IterationCount++;
}
}
//fwrite( $fp, "\n\n\n\n\n\n\n\n----------------------------------------\n\n\n\n\n\n\n\n" );
for( $index = $IterationCount-1; $index >= 0; $index-- )
//foreach( $ArrayRecentPostLines as $key => $val )
{
fwrite( $fp, $ArrayRecentPostLines[$index] );
}
fwrite( $stderr, "Done" );
fclose($fp);
if( $Debug )
{
if( true )
{
$fp = fopen('Debug.txt', 'w');
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptFound \n\n\n\n\n\n\n\n\n\n" );
foreach( $ConceptFound as $key => $val ) { $data = $key . "\n" . print_r($val, true) . "\n"; fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );
foreach( $ArrayAllArticleIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleFoundConcepts \n\n\n\n\n\n\n\n\n\n" );
foreach( $ArrayAllArticleFoundConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleNotIndexedConcepts \n\n\n\n\n\n\n\n\n\n" );
foreach( $ArrayAllArticleNotIndexedConcepts as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ConceptSeeAlso \n\n\n\n\n\n\n\n\n\n" );
foreach( $ConceptSeeAlso as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleOfficialSummaries \n\n\n\n\n\n\n\n\n\n" );
foreach( $ArrayAllArticleOfficialSummaries as $key => $val ) { $data = $key . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fwrite( $fp, "\n\n\n\n\n\n\n\n\n\n ArrayAllArticleUsedSummaries \n\n\n\n\n\n\n\n\n\n" );
foreach( $ArrayAllArticleUsedSummaries as $key => $val ) { $data = $ArrayAllArticleTitle[$key] . "\n" . print_r($val, true); fwrite( $fp, $data ); }
fclose($fp);
}
}
fclose($stderr);
//
?>