|Home|Why Use?|Demos|About |Contact|Mail List |Group |Hpricot By Mr _Why |FAQ|Workbench|Workbench-RO|

hpricot_object


In my controller I use syntax like this to build my hpricot_object:


# This is a demo controller

require 'rubygems'
require 'hpricot'
require 'open-uri'

class DemoController < ActionController::Base
  # I help Hpricot
  Hpricot.buffer_size = 262144

  # Null out the parse() method of  WebAgent::CookieManager so it cannot save cookies
  module Tst
    class WebAgent::CookieManager
      # Empty parse method which nulls out the effect of the real parse()
      def parse x, y
        @index = "nada"
      end # parse
    end # class
  end # module

  def search_google
    myurl = "http://www.google.com"
    mysearch_expression = "div.gb2/a[@href^='http']"
    hpricot_object = get_my_hp_elem(myurl)
    @somehtml = hpricot_object.search(mysearch_expression).to_html
  end


  protected

  # Returns raw HTML.  Usually it gets passed to get_my_hp_elem()
  def get_my_html_from_open_uri(u)
    hdrs = {"User-Agent"=>"Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1", "Accept-Charset"=>"utf-8", "Connection"=>"Keep-Alive", "Accept"=>"text/html"}
    my_html = ""
    begin
      open(u, hdrs).each {|s| my_html << s}
    rescue
      my_html = "<html><body><p /><b>hello world</b></body></html>"
    end
    return my_html
  end # get_my_html_from_open_uri()

  # Returns an Hpricot object from HTML obtained by get_my_html_from_open_uri()
  def get_my_hp_elem(u)
    h0 = Hpricot(get_my_html_from_open_uri(u))
    # remove crap
    # (h0/"script").remove
    return h0
  end # get_my_hp_elem()

end # class

The corresponding template ( /app/views/demo/search_google.rhtml ) could be quite simple:

<%= @somehtml %>

Try it out:

/demo/search_google



Source Code Which Serves This Page:

/views/layouts/application.rhtml

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
  <%= render(:partial => "layouts/head") %>
<body>
<table id="t2" width="98%" border="0">
<tr id="t2r2">
<td id="t2r2c2" valign="top">
  <%= render(:partial => "layouts/t2r2c2") %>
</td> <!-- t2r2c2 end -->
<td id="t2r2c4" valign="top">
<%= render(:partial => "layouts/bcrmbs") %>
<%= yield %>
<hr />
<h3> Source Code Which Serves This Page: </h3>
<%= render(:partial => "layouts/show_source") %>
<hr />
<%= render(:partial => "layouts/bcrmbs") %>
<hr />
</td> <!-- t2r2c4 end -->
<td id="t2r2c6" class="hpricot-color" valign="top">
  <%= render(:partial => "layouts/t2r2c6") %>
</td> <!-- t2r2c6 end -->
</tr> <!-- t2r2 end -->
</table>  <!-- t2 end -->
</body>
</html>

/views/layouts/_head.haml

%head
  %meta{"http-equiv" => "Content-Type", :content => "text/html; charset=utf-8"}
  %title hpricot.com
  %link{:rel => "shortcut icon", :href => "/favicon.ico"}
  = javascript_include_tag("prototype")
  = javascript_include_tag("effects")
  = javascript_include_tag("dragdrop")
  = javascript_include_tag("controls")
  = javascript_include_tag("application")
  = active_scaffold_includes
  = stylesheet_link_tag("application")

/views/layouts/_t2r2c2.haml

/ _t2r2c2.haml
%td#t2r2c2.hpricot-color{:valign => "top"}
  %div.white-color
    %hr
    %a{:href => "/"}
      %img{:src => "/images/hpricot-small.png", :alt => "http://hpricot.com"}
    %hr
    %h2
      = bcrmb('<a href="/">http://hpricot.com</a>')
    %hr
    %p
      %b
        = bcrmb('<a href="/sttc/why_hpricot"> Why Learn/Use Hpricot?</a>')
    %p
      %b
        = bcrmb('<a href="/demos"> Hpricot Demonstrations</a>')
    %p
      %b
        = bcrmb('<a href="/demos/about"> About</a>')
    %p
      %b
        = bcrmb('<a href="/sttc/contact"> Contact</a>')
    %p
      %b
        = bcrmb('<a href="/sttc/maillist"> Mail List</a>')
    %p
      %b
        %a{:href => "http://groups.google.com/group/hpricot", :target => "hp"} http://groups.google.com/group/hpricot
    %p
      %b
        %a{:href => "http://code.whytheluckystiff.net/hpricot", :target => "hp"} http://code.whytheluckystiff.net/hpricot
    %hr
    %p= render(:partial => "layouts/login_status")
    %hr
    %p
      %b
        = bcrmb('<a href="/sttc/workbench_faq"> FAQ for Hpricot Workbench</a>')
    %p
      %b
        = bcrmb('<a href="/frgmnts"> An Hpricot Workbench</a>')
    %p
      %b
        = bcrmb('<a href="/asls/frgmnts"> Hpricot Workbench Public (Read-Only)</a>')
    %p
      %b
        %a{:href => "http://chrispederick.com/work/web-developer", :target => "wd"}
          Hpricot Development Aid:
          %br http://chrispederick.com/work/web-developer
    %p
      %b
        %a{:href => "http://www.getfirebug.com", :target => "wd"}
          Hpricot Development Aid:
          %br http://www.getfirebug.com
    %p
      %b
        %a{:href => "/hpricot_rdoc154/index.html", :target => "hp"} RDoc of Hpricot 0.6
    %p
      %b
        %a{:href => "/app/index.html", :target => "hp"} RDoc of hpricot.com
    %p
      %b
        = bcrmb('<a href="/sttc/disclaimer"> Disclaimer!</a>')
    %hr
    = render(:partial => "/layouts/built_with")
    %hr

/views/layouts/_show_source.rhtml

<%= show_code("views/layouts", "application.rhtml") -%>
<%= show_code("views/layouts", "_head.haml") -%>
<%= show_code("views/layouts", "_t2r2c2.haml") -%>
<%= show_code("views/layouts", "_show_source.rhtml") -%>
<%= show_code("../public/stylesheets/sass", "application.sass") -%>
<%= show_code("../config", "routes.rb") -%>

<%= show_code("controllers", "application.rb") -%>
<%= show_code("controllers", "#{params[:controller]}_controller.rb") -%>
<%= show_code("helpers", "application_helper.rb") -%>
<%= show_code("helpers", "#{params[:controller]}_helper.rb") -%>
<p />
The Hpricot Workbench makes use of these models:
<p />
<%= show_code("models", "exprtype.rb") -%>
<%= show_code("models", "stck.rb") -%>
<%= show_code("models", "frgmnt.rb") -%>
<%= show_code("models", "usr.rb") -%>

/../public/stylesheets/sass/application.sass

td
  :border-style solid
  :border-width 1px
  :border-color black


body
  :background-color white
  :width 98%

.hpricot-color
  :background-color #FF7700

.white-color
  :background-color white

a
  :text-decoration underline
  :color #335500
  :font-weight bold
a:hover
  :text-decoration underline
  :color #FF7700
  :background-color #FFFFFF
a:visited


.redstack
  :background-color red

.greenstack
  :background-color green

.bluestack
  :background-color blue

.yellowstack
  :background-color yellow

.orangestack
  :background-color orange

.whitestack
  :background-color white

.blackstack
  :background-color black

// frgmnt-frgtxt is out of control. I constrain it with scroll bars.
div.frgmnt-frgtxt
  :overflow scroll
  :height 200px
  :width 200px


// Make text message at end of frgtxt more noticeable
span.snipmsg
  :color #1F7F00
  :font-weight bold

// Code

code
  :font-size 120%
  :line-height 140%
  :background-color #ddd

pre
  :background #333
  :color #fffed8
  :border 1px inset #aaa
  :overflow auto
  :padding 3px 5px
  :margin 5px 0

pre code
  :background-color transparent
// Code End

/../config/routes.rb

ActionController::Routing::Routes.draw do |map|
  # The priority is based upon order of creation: first created -> highest priority.

  # Sample of regular route:
  # map.connect 'products/:id', :controller => 'catalog', :action => 'view'
  # Keep in mind you can assign values other than :controller and :action

  # Sample of named route:
  # map.purchase 'products/:id/purchase', :controller => 'catalog', :action => 'purchase'
  # This route can be invoked with purchase_url(:id => product.id)

  # You can have the root of your site routed by hooking up ''
  # -- just remember to delete public/index.html.
  map.connect '', :controller => "demos", :action => "about"

  # Allow downloading Web Service WSDL as a file with an extension
  # instead of a file named 'wsdl'
  map.connect ':controller/service.wsdl', :action => 'wsdl'

  # Install the default route as the lowest priority.
  map.connect ':controller/:action/:id.:format'
  map.connect ':controller/:action/:id'
end

/controllers/application.rb

# Filters added to this controller apply to all controllers in the application.
# Likewise, all the methods added will be available for all controllers.

require 'usr_system'
require 'rubygems'
require 'hpricot'
require 'open-uri'

class ApplicationController < ActionController::Base
  # Pick a unique cookie name to distinguish our session data from others'
  session :session_key => '_hp12_session_id'
  include ERB::Util
  include UsrSystem
  helper :usr
  before_filter :authenticate_usr
  # I help Hpricot
  Hpricot.buffer_size = 262144

  # Null out the parse() method of  WebAgent::CookieManager so it cannot save cookies
  module Tst
    class WebAgent::CookieManager
      # Empty parse method which nulls out the effect of the real parse()
      def parse x, y
        @index = "nada"
      end # parse
    end # class
  end # module

  # Method for rendering the HTML in a Fragment
  def rndr_frgmnt
    @somehtml = Frgmnt.find(params[:id]).frgtxt
    render(:layout => "layouts/layout4rndr")
  end # rndr_frgmnt

  protected

  # Returns raw HTML.  Usually it gets passed to get_my_hp_elem()
  def get_my_html_from_open_uri(u)
    hdrs = {"User-Agent"=>"Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1", "Accept-Charset"=>"utf-8", "Connection"=>"Keep-Alive", "Accept"=>"text/html"}
    my_html = ""
    begin
      open(u, hdrs).each {|s| my_html << s}
    rescue
      my_html = "<html><body><p /><b>hello world</b></body></html>"
    end
    return my_html
  end # get_my_html_from_open_uri()

  # Returns an Hpricot object from HTML obtained by get_my_html_from_open_uri()
  def get_my_hp_elem(u)
    h0 = Hpricot(get_my_html_from_open_uri(u))
    # remove crap
    # (h0/"script").remove
    return h0
  end # get_my_hp_elem()

  # Repel users who manually tinker with id in the request URL
  def repel_them
    # I dont need to repel_them if id is nil
    return if (params[:id] == nil)

    # Get the model class loaded into an object which can also be a class
    myklass = self.active_scaffold_config.model
    # Use the object-class-beastie to run .find() against the id in the request URL
    the_obj = myklass.find(params[:id])
    # Find out who owns the_obj
    the_usr_id = the_obj.send(:usr_id)
    # Repel them if they dont own the object pointed to by the id in the request URL
    redirect_to("/") unless the_usr_id == session[:usr_id]
  end

end # class

/controllers/sttc_controller.rb

# Serves up mostly static content.
class SttcController < ApplicationController
  # Open this controller to the world
  skip_before_filter :authenticate_usr

end # class

/helpers/application_helper.rb

# Methods in this helper are available to all templates in the application.
module ApplicationHelper

  # An Hpricot based breadcrumb helper.
  # Usage:
  # bcrmb("<a href='/x/y'>About</a> | <a href='/z'>Contact</a>")
  # Loads html into an hpricot and then swaps out any a-tag with a span-tag
  # if the a-tag-href matches the request.path
  def bcrmb(h)
    # get an hpricot from the input-html
    hp =  Hpricot(h)
    # Inside the hpricot, look for an a-tag containing the request.path
    hps = hp.search("a[@href=#{request.path}]")
    if (matched_a_tag = hps.first)
      # I hooked one, get it's text node
      txtnode = matched_a_tag.inner_text
      # Inside the hpricot, swap the a-tag with a span-tag
      matched_a_tag.swap("<span class='bcrmb'>#{txtnode}</span>")
    end
    # Pull html out of the hpricot.
    return hp.to_html
  end # bcrmb


  # Builds a simple a-element from URL
  def inputurl_column(record)
    "<a target='inputurl'  href='#{record.inputurl}'>#{record.inputurl}</a>"
  end # inputurl_column

  # Override the stck column so I can add color to it via CSS
  def stck_column(record)
    "<span class='#{record.stck.name}'>#{record.stck.name}</span>"
  end

  # Override the frgtxt column so I can add links to it and maybe show a subset of the data in it.
  def frgtxt_column(record)
    link_to_rndr = link_to("Render The HTML Below:", {:id => record, :action => "rndr_frgmnt", :controller => "frgmnts"}, {:target => "l"})
    # Use the ERB::Util.h() method below to make sure the HTML is displayed rather than rendered.
    # If they want to render, they can click the link.
    # Notice that I use .slice() to limit the amount of text sent back to the browser.
    # If they want to see all of the text they can .rndr_frgmnt() and use browser-view-source.
    myfrgtxt = (record.frgtxt || "nil")
    if (h(myfrgtxt).length > 1024)
      snipmsg = "<span class='snipmsg'>SNIPPED at character number 1024.  Use render and then browser-view-source to see all of it.</span>"
    else
      snipmsg = ""
    end # if
    return("<div class='frgmnt-frgtxt'> <hr />#{link_to_rndr}<hr />#{h(myfrgtxt.slice(0,1024))} <hr /> #{snipmsg} <hr /></div>")
  end # frgtxt_column()

  # I found this in the AS demo.  They use it to show ruby code which corresponds to scaffold views.
  # This helper is called in a partial here: app/views/layouts/_show_source.rhtml
  # Here is a sample line from app/views/layouts/_show_source.rhtml:
  # <%= show_code("controllers", "#{params[:controller]}_controller.rb") -%>
  def show_code(path, filename, comment = "")
    begin
      file = File.open("#{File.dirname __FILE__}/../../app/#{path}/#{filename}")
<<PRE_BLOCK
<h4>/#{path}/#{filename} #{comment}</h4>
<pre><code class=\"ruby\">#{file.read.gsub("<", "<").gsub(">", ">").strip}</code></pre>
PRE_BLOCK
    rescue
      "#{filename} is missing"
    end # begin, rescue
  end # show_code

end

/helpers/sttc_helper.rb

module SttcHelper
end

The Hpricot Workbench makes use of these models:

/models/exprtype.rb

class Exprtype < ActiveRecord::Base

  # Associations should come after callbacks
  has_many :frgmnts
  # Validations come after associations
  validates_presence_of :name
  validates_uniqueness_of :name, :message => " is already being used.  Pick a different name."
end

/models/stck.rb

class Stck < ActiveRecord::Base

  # Associations should come after callbacks
  has_many :frgmnts
  # Validations come after associations
  validates_presence_of :name
  validates_uniqueness_of :name, :message => " is already being used.  Pick a different name."
end

/models/frgmnt.rb

class Frgmnt < ActiveRecord::Base
  acts_as_tree  :order => "name"
  # Associations should come after callbacks
  belongs_to :usr
  belongs_to :exprtype
  belongs_to :stck
  # Validations come after associations
  validates_presence_of :name
  validates_uniqueness_of :name, :message => " is already being used.  Pick a different name."

  protected

  def validate
    case
    # Ensure we have some input
    when (parent == nil and inputurl == nil)
      errors.add(:parent, ", and Input URL are all nil.  You need one.")
    # We only want 1 input
    when (parent != nil and inputurl != nil)
      errors.add(:parent, "and Input URL are both not nil.  A fragment needs 1 (and only 1!)")
    # We should not scrape ourself
    when (parent == self)
      errors.add(:parent, " == self.  Pick a different parent.")
    # The controller will set name = "record_usr_id_ne_session_usr_id" if
    # I try to update a record I do not own.
    when name == "record_usr_id_ne_session_usr_id"
      errors.add_to_base "You can only update your records, not other's records."
    # display-enumerable needs format like this: table.some-class,[1,5]
    when (exprtype.name == 'display-enumerable()' and  ((arg1 =~ /(.*)?(\,)(\[)(\d+)(,)(\d+)(\])$/) != 0))
      errors.add(:arg1, ' Problem. display-enumerable() needs format like this: table.some-class,[1,5]')
    end # case
  end # validate

end

/models/usr.rb

require 'digest/sha1'

# this model expects a certain database layout and its based on the name/login pattern.
class Usr < ActiveRecord::Base
  CHANGEABLE_FIELDS = ['first_name', 'last_name', 'email']
  attr_accessor :password_needs_confirmation

  after_save '@password_needs_confirmation = false'
  after_validation :crypt_password

  validates_presence_of :login, :on => :create
  validates_length_of :login, :within => 3..40, :on => :create
  validates_uniqueness_of :login, :on => :create
  validates_uniqueness_of :email, :on => :create

  validates_presence_of :password, :if => :validate_password?
  validates_confirmation_of :password, :if => :validate_password?
  validates_length_of :password, { :minimum => 5, :if => :validate_password? }
  validates_length_of :password, { :maximum => 40, :if => :validate_password? }

  # Associations should come after callbacks
  has_many :frgmnts


  def initialize(attributes = nil)
    super
    @password_needs_confirmation = false
  end

  def self.authenticate(login, pass)
    u = find( :first, :conditions => ["login = ? AND verified = TRUE AND deleted = FALSE", login])
    return nil if u.nil?
    find( :first, :conditions => ["login = ? AND salted_password = ? AND verified = TRUE", login, salted_password(u.salt, hashed(pass))])
  end

  def self.authenticate_by_token(id, token)
    # Allow logins for deleted accounts, but only via this method (and
    # not the regular authenticate call)
    logger.info "Attempting authorization of #{id} with #{token}"
    u = find( :first, :conditions => ["id = ? AND security_token = ?", id, token])
    if u
      logger.info "Authenticated by token: #{u.inspect}"
    else
      logger.info "Not authenticated" if u.nil?
    end
    return nil if (u.nil? or u.token_expired?)
    u.update_attributes :verified => true, :token_expiry => Clock.now
    return u
  end

  def token_expired?
    self.security_token and self.token_expiry and (Clock.now >= self.token_expiry)
  end

  def generate_security_token
    if self.security_token.nil? or self.token_expiry.nil? or (Clock.now.to_i + token_lifetime / 2) >= self.token_expiry.to_i
      token = new_security_token
      return token
    else
      return self.security_token
    end
  end

  def change_password(pass, confirm = nil)
    self.password = pass
    self.password_confirmation = confirm.nil? ? pass : confirm
    @password_needs_confirmation = true
  end

  def token_lifetime
    UsrSystem::CONFIG[:security_token_life_hours] * 60 * 60
  end

  # Help Active Scaffold display Usr objects.
  # ref: http://activescaffold.com/tutorials/to_label
  def to_label
    login
  end

  protected

  attr_accessor :password, :password_confirmation

  def validate_password?
    @password_needs_confirmation
  end

  def self.hashed(str)
    return Digest::SHA1.hexdigest("change-me--#{str}--")[0..39]
  end

  def crypt_password
    if @password_needs_confirmation
      write_attribute("salt", self.class.hashed("salt-#{Clock.now}"))
      write_attribute("salted_password", self.class.salted_password(salt, self.class.hashed(@password)))
    end
  end

  def new_security_token
    expiry = Time.at(Clock.now.to_i + token_lifetime)
    write_attribute('security_token', self.class.hashed(self.salted_password + Clock.now.to_i.to_s + rand.to_s))
    write_attribute('token_expiry', expiry)
    update_without_callbacks
    return self.security_token
  end

  def self.salted_password(salt, hashed_password)
    hashed(salt + hashed_password)
  end
end

|Home|Why Use?|Demos|About |Contact|Mail List |Group |Hpricot By Mr _Why |FAQ|Workbench|Workbench-RO|